diff --git a/.clang-tidy b/.clang-tidy index 219ac263ab3..896052915f7 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -37,6 +37,7 @@ Checks: [ '-cert-oop54-cpp', '-cert-oop57-cpp', + '-clang-analyzer-optin.core.EnumCastOutOfRange', # https://github.com/abseil/abseil-cpp/issues/1667 '-clang-analyzer-optin.performance.Padding', '-clang-analyzer-unix.Malloc', @@ -94,6 +95,7 @@ Checks: [ '-modernize-pass-by-value', '-modernize-return-braced-init-list', '-modernize-use-auto', + '-modernize-use-constraints', # This is a good check, but clang-tidy crashes, see https://github.com/llvm/llvm-project/issues/91872 '-modernize-use-default-member-init', '-modernize-use-emplace', '-modernize-use-nodiscard', @@ -121,7 +123,8 @@ Checks: [ '-readability-magic-numbers', '-readability-named-parameter', '-readability-redundant-declaration', - '-readability-redundant-inline-specifier', + '-readability-redundant-inline-specifier', # useful but incompatible with __attribute((always_inline))__ (aka. ALWAYS_INLINE, base/base/defines.h). + # ALWAYS_INLINE only has an effect if combined with `inline`: https://godbolt.org/z/Eefd74qdM '-readability-redundant-member-init', # Useful but triggers another problem. Imagine a struct S with multiple String members. Structs are often instantiated via designated # initializer S s{.s1 = [...], .s2 = [...], [...]}. In this case, compiler warning `missing-field-initializers` requires to specify all members which are not in-struct # initialized (example: s1 in struct S { String s1; String s2{};}; is not in-struct initialized, therefore it must be specified at instantiation time). As explicitly @@ -132,12 +135,7 @@ Checks: [ '-readability-uppercase-literal-suffix', '-readability-use-anyofallof', - '-zircon-*', - - # This is a good check, but clang-tidy crashes, see https://github.com/llvm/llvm-project/issues/91872 - '-modernize-use-constraints', - # https://github.com/abseil/abseil-cpp/issues/1667 - '-clang-analyzer-optin.core.EnumCastOutOfRange' + '-zircon-*' ] WarningsAsErrors: '*' diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 3e0131a388a..f9765c1d57b 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -42,47 +42,39 @@ At a minimum, the following information should be added (but add more as needed) > Information about CI checks: https://clickhouse.com/docs/en/development/continuous-integration/
- Modify your CI run + CI Settings **NOTE:** If your merge the PR with modified CI you **MUST KNOW** what you are doing **NOTE:** Checked options will be applied if set before CI RunConfig/PrepareRunConfig step - -#### Include tests (required builds will be added automatically): -- [ ] Fast test -- [ ] Integration Tests -- [ ] Stateless tests -- [ ] Stateful tests -- [ ] Unit tests -- [ ] Performance tests -- [ ] All with ASAN -- [ ] All with TSAN -- [ ] All with Analyzer -- [ ] All with Azure -- [ ] Add your option here - -#### Exclude tests: -- [ ] Fast test -- [ ] Integration Tests -- [ ] Stateless tests -- [ ] Stateful tests -- [ ] Performance tests -- [ ] All with ASAN -- [ ] All with TSAN -- [ ] All with MSAN -- [ ] All with UBSAN -- [ ] All with Coverage -- [ ] All with Aarch64 -- [ ] Add your option here - -#### Extra options: +- [ ] Allow: Integration Tests +- [ ] Allow: Stateless tests +- [ ] Allow: Stateful tests +- [ ] Allow: Unit tests +- [ ] Allow: Performance tests +- [ ] Allow: All with aarch64 +- [ ] Allow: All with ASAN +- [ ] Allow: All with TSAN +- [ ] Allow: All with Analyzer +- [ ] Allow: All with Azure +- [ ] Allow: Add your option here +--- +- [ ] Exclude: Fast test +- [ ] Exclude: Integration Tests +- [ ] Exclude: Stateless tests +- [ ] Exclude: Stateful tests +- [ ] Exclude: Performance tests +- [ ] Exclude: All with ASAN +- [ ] Exclude: All with TSAN +- [ ] Exclude: All with MSAN +- [ ] Exclude: All with UBSAN +- [ ] Exclude: All with Coverage +- [ ] Exclude: All with Aarch64 +--- - [ ] do not test (only style check) - [ ] disable merge-commit (no merge from master before tests) - [ ] disable CI cache (job reuse) - -#### Only specified batches in multi-batch jobs: -- [ ] 1 -- [ ] 2 -- [ ] 3 -- [ ] 4 - +- [ ] allow: batch 1 for multi-batch jobs +- [ ] allow: batch 2 +- [ ] allow: batch 3 +- [ ] allow: batch 4, 5 and 6
diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index d2ea714e4e4..c2a893a8e99 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -27,15 +27,16 @@ jobs: run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 sync_pr.py --merge || : - - name: Python unit tests - run: | - cd "$GITHUB_WORKSPACE/tests/ci" - echo "Testing the main ci directory" - python3 -m unittest discover -s . -p 'test_*.py' - for dir in *_lambda/; do - echo "Testing $dir" - python3 -m unittest discover -s "$dir" -p 'test_*.py' - done +# Runs in MQ: +# - name: Python unit tests +# run: | +# cd "$GITHUB_WORKSPACE/tests/ci" +# echo "Testing the main ci directory" +# python3 -m unittest discover -s . -p 'test_*.py' +# for dir in *_lambda/; do +# echo "Testing $dir" +# python3 -m unittest discover -s "$dir" -p 'test_*.py' +# done - name: PrepareRunConfig id: runconfig run: | @@ -53,13 +54,13 @@ jobs: - name: Re-create GH statuses for skipped jobs if any run: | python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ runner.temp }}/ci_run_data.json --update-gh-statuses - BuildDockers: - needs: [RunConfig] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_docker.yml - with: - data: ${{ needs.RunConfig.outputs.data }} - # Tested in MQ +# Runs in MQ: +# BuildDockers: +# needs: [RunConfig] +# if: ${{ !failure() && !cancelled() }} +# uses: ./.github/workflows/reusable_docker.yml +# with: +# data: ${{ needs.RunConfig.outputs.data }} # StyleCheck: # needs: [RunConfig, BuildDockers] # if: ${{ !failure() && !cancelled() }} @@ -70,262 +71,73 @@ jobs: # data: ${{ needs.RunConfig.outputs.data }} # run_command: | # python3 style_check.py --no-push - CompatibilityCheckX86: - needs: [RunConfig, BuilderDebRelease] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml + + ################################# Main stages ################################# + # for main CI chain + # + Builds_1: + needs: [RunConfig] + if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Builds_1') }} + # using callable wf (reusable_stage.yml) allows grouping all nested jobs under a tab + uses: ./.github/workflows/reusable_build_stage.yml with: - test_name: Compatibility check (amd64) - runner_type: style-checker + stage: Builds_1 data: ${{ needs.RunConfig.outputs.data }} - CompatibilityCheckAarch64: - needs: [RunConfig, BuilderDebAarch64] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml + Tests_1: + needs: [RunConfig, Builds_1] + if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Tests_1') }} + uses: ./.github/workflows/reusable_test_stage.yml with: - test_name: Compatibility check (aarch64) - runner_type: style-checker + stage: Tests_1 data: ${{ needs.RunConfig.outputs.data }} -######################################################################################### -#################################### ORDINARY BUILDS #################################### -######################################################################################### -# TODO: never skip builds! - BuilderDebRelease: - needs: [RunConfig, BuildDockers] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_build.yml + Builds_2: + needs: [RunConfig, Builds_1] + if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Builds_2') }} + uses: ./.github/workflows/reusable_build_stage.yml with: - build_name: package_release - checkout_depth: 0 + stage: Builds_2 data: ${{ needs.RunConfig.outputs.data }} - BuilderDebReleaseCoverage: - needs: [RunConfig, BuildDockers] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_build.yml + Tests_2: + needs: [RunConfig, Builds_2] + if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Tests_2') }} + uses: ./.github/workflows/reusable_test_stage.yml with: - build_name: package_release_coverage - checkout_depth: 0 + stage: Tests_2 data: ${{ needs.RunConfig.outputs.data }} - BuilderDebAarch64: - needs: [RunConfig, BuildDockers] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_build.yml + # stage for jobs that do not prohibit merge + Tests_3: + needs: [RunConfig, Builds_1] + if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Tests_3') }} + uses: ./.github/workflows/reusable_test_stage.yml with: - build_name: package_aarch64 - checkout_depth: 0 + stage: Tests_3 data: ${{ needs.RunConfig.outputs.data }} - BuilderBinRelease: - needs: [RunConfig, BuildDockers] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_build.yml - with: - build_name: binary_release - checkout_depth: 0 # otherwise we will have no info about contributors - data: ${{ needs.RunConfig.outputs.data }} - BuilderDebAsan: - needs: [RunConfig, BuildDockers] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_build.yml - with: - build_name: package_asan - data: ${{ needs.RunConfig.outputs.data }} - BuilderDebUBsan: - needs: [RunConfig, BuildDockers] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_build.yml - with: - build_name: package_ubsan - data: ${{ needs.RunConfig.outputs.data }} - BuilderDebTsan: - needs: [RunConfig, BuildDockers] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_build.yml - with: - build_name: package_tsan - data: ${{ needs.RunConfig.outputs.data }} - BuilderDebMsan: - needs: [RunConfig, BuildDockers] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_build.yml - with: - build_name: package_msan - data: ${{ needs.RunConfig.outputs.data }} - BuilderDebDebug: - needs: [RunConfig, BuildDockers] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_build.yml - with: - build_name: package_debug - data: ${{ needs.RunConfig.outputs.data }} -########################################################################################## -##################################### SPECIAL BUILDS ##################################### -########################################################################################## - BuilderBinClangTidy: - needs: [RunConfig, BuilderDebRelease] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_build.yml - with: - build_name: binary_tidy - data: ${{ needs.RunConfig.outputs.data }} - BuilderBinDarwin: - needs: [RunConfig, BuilderDebRelease] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_build.yml - with: - build_name: binary_darwin - data: ${{ needs.RunConfig.outputs.data }} - checkout_depth: 0 - BuilderBinAarch64: - needs: [RunConfig, BuilderDebRelease] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_build.yml - with: - build_name: binary_aarch64 - data: ${{ needs.RunConfig.outputs.data }} - checkout_depth: 0 - BuilderBinFreeBSD: - needs: [RunConfig, BuilderDebRelease] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_build.yml - with: - build_name: binary_freebsd - data: ${{ needs.RunConfig.outputs.data }} - checkout_depth: 0 - BuilderBinDarwinAarch64: - needs: [RunConfig, BuilderDebRelease] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_build.yml - with: - build_name: binary_darwin_aarch64 - data: ${{ needs.RunConfig.outputs.data }} - checkout_depth: 0 - BuilderBinPPC64: - needs: [RunConfig, BuilderDebRelease] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_build.yml - with: - build_name: binary_ppc64le - data: ${{ needs.RunConfig.outputs.data }} - checkout_depth: 0 - BuilderBinAmd64Compat: - needs: [RunConfig, BuilderDebRelease] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_build.yml - with: - build_name: binary_amd64_compat - data: ${{ needs.RunConfig.outputs.data }} - checkout_depth: 0 - BuilderBinAmd64Musl: - needs: [RunConfig, BuilderDebRelease] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_build.yml - with: - build_name: binary_amd64_musl - data: ${{ needs.RunConfig.outputs.data }} - checkout_depth: 0 - BuilderBinAarch64V80Compat: - needs: [RunConfig, BuilderDebRelease] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_build.yml - with: - build_name: binary_aarch64_v80compat - data: ${{ needs.RunConfig.outputs.data }} - checkout_depth: 0 - BuilderBinRISCV64: - needs: [RunConfig, BuilderDebRelease] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_build.yml - with: - build_name: binary_riscv64 - data: ${{ needs.RunConfig.outputs.data }} - checkout_depth: 0 - BuilderBinS390X: - needs: [RunConfig, BuilderDebRelease] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_build.yml - with: - build_name: binary_s390x - data: ${{ needs.RunConfig.outputs.data }} - checkout_depth: 0 - BuilderBinLoongarch64: - needs: [RunConfig, BuilderDebRelease] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_build.yml - with: - build_name: binary_loongarch64 - data: ${{ needs.RunConfig.outputs.data }} - checkout_depth: 0 -############################################################################################ -##################################### Docker images ####################################### -############################################################################################ - DockerServerImage: - needs: [RunConfig, BuilderDebRelease, BuilderDebAarch64] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Docker server image - runner_type: style-checker - data: ${{ needs.RunConfig.outputs.data }} - DockerKeeperImage: - needs: [RunConfig, BuilderDebRelease, BuilderDebAarch64] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Docker keeper image - runner_type: style-checker - data: ${{ needs.RunConfig.outputs.data }} -############################################################################################ -##################################### BUILD REPORTER ####################################### -############################################################################################ - BuilderReport: + + ################################# Reports ################################# + # Reports should be run even if Builds_1/2 failed - put them separately in wf (not in Tests_1/2) + Builds_1_Report: # run report check for failed builds to indicate the CI error - if: ${{ !cancelled() }} - needs: - - RunConfig - - BuilderDebAarch64 - - BuilderDebAsan - - BuilderDebDebug - - BuilderDebMsan - - BuilderDebRelease - - BuilderDebTsan - - BuilderDebUBsan + if: ${{ !cancelled() && needs.RunConfig.result == 'success' && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'ClickHouse build check') }} + needs: [RunConfig, Builds_1] uses: ./.github/workflows/reusable_test.yml with: test_name: ClickHouse build check runner_type: style-checker-aarch64 data: ${{ needs.RunConfig.outputs.data }} - BuilderSpecialReport: + Builds_2_Report: # run report check for failed builds to indicate the CI error - if: ${{ !cancelled() }} - needs: - - RunConfig - - BuilderBinAarch64 - - BuilderBinDarwin - - BuilderBinDarwinAarch64 - - BuilderBinFreeBSD - - BuilderBinPPC64 - - BuilderBinRISCV64 - - BuilderBinS390X - - BuilderBinLoongarch64 - - BuilderBinAmd64Compat - - BuilderBinAarch64V80Compat - - BuilderBinClangTidy - - BuilderBinAmd64Musl - - BuilderDebReleaseCoverage - - BuilderBinRelease + if: ${{ !cancelled() && needs.RunConfig.result == 'success' && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'ClickHouse special build check') }} + needs: [RunConfig, Builds_2] uses: ./.github/workflows/reusable_test.yml with: test_name: ClickHouse special build check runner_type: style-checker-aarch64 data: ${{ needs.RunConfig.outputs.data }} + MarkReleaseReady: if: ${{ !failure() && !cancelled() }} - needs: - - BuilderBinDarwin - - BuilderBinDarwinAarch64 - - BuilderDebRelease - - BuilderDebAarch64 - runs-on: [self-hosted, style-checker] + needs: [RunConfig, Builds_1, Builds_2] + runs-on: [self-hosted, style-checker-aarch64] steps: - name: Debug run: | @@ -338,7 +150,7 @@ jobs: no both ${{ !(contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure')) }} EOF - name: Not ready - # fail the job to be able restart it + # fail the job to be able to restart it if: ${{ contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure') }} run: exit 1 - name: Check out repository code @@ -349,544 +161,14 @@ jobs: run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 mark_release_ready.py -############################################################################################ -#################################### INSTALL PACKAGES ###################################### -############################################################################################ - InstallPackagesTestRelease: - needs: [RunConfig, BuilderDebRelease] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Install packages (amd64) - runner_type: style-checker - data: ${{ needs.RunConfig.outputs.data }} - run_command: | - python3 install_check.py "$CHECK_NAME" - InstallPackagesTestAarch64: - needs: [RunConfig, BuilderDebAarch64] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Install packages (arm64) - runner_type: style-checker-aarch64 - data: ${{ needs.RunConfig.outputs.data }} - run_command: | - python3 install_check.py "$CHECK_NAME" -############################################################################################## -########################### FUNCTIONAl STATELESS TESTS ####################################### -############################################################################################## - FunctionalStatelessTestRelease: - needs: [RunConfig, BuilderDebRelease] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stateless tests (release) - runner_type: func-tester - data: ${{ needs.RunConfig.outputs.data }} - FunctionalStatelessTestReleaseAnalyzerS3Replicated: - needs: [RunConfig, BuilderDebRelease] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stateless tests (release, old analyzer, s3, DatabaseReplicated) - runner_type: func-tester - data: ${{ needs.RunConfig.outputs.data }} - FunctionalStatelessTestS3Debug: - needs: [RunConfig, BuilderDebDebug] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stateless tests (debug, s3 storage) - runner_type: func-tester - data: ${{ needs.RunConfig.outputs.data }} - FunctionalStatelessTestS3Tsan: - needs: [RunConfig, BuilderDebTsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stateless tests (tsan, s3 storage) - runner_type: func-tester - data: ${{ needs.RunConfig.outputs.data }} - FunctionalStatelessTestAarch64: - needs: [RunConfig, BuilderDebAarch64] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stateless tests (aarch64) - runner_type: func-tester-aarch64 - data: ${{ needs.RunConfig.outputs.data }} - FunctionalStatelessTestAsan: - needs: [RunConfig, BuilderDebAsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stateless tests (asan) - runner_type: func-tester - data: ${{ needs.RunConfig.outputs.data }} - FunctionalStatelessTestTsan: - needs: [RunConfig, BuilderDebTsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stateless tests (tsan) - runner_type: func-tester - data: ${{ needs.RunConfig.outputs.data }} - FunctionalStatelessTestMsan: - needs: [RunConfig, BuilderDebMsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stateless tests (msan) - runner_type: func-tester - data: ${{ needs.RunConfig.outputs.data }} - FunctionalStatelessTestUBsan: - needs: [RunConfig, BuilderDebUBsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stateless tests (ubsan) - runner_type: func-tester - data: ${{ needs.RunConfig.outputs.data }} - FunctionalStatelessTestDebug: - needs: [RunConfig, BuilderDebDebug] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stateless tests (debug) - runner_type: func-tester - data: ${{ needs.RunConfig.outputs.data }} - FunctionalStatelessTestAsanAzure: - needs: [RunConfig, BuilderDebAsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stateless tests (azure, asan) - runner_type: func-tester - data: ${{ needs.RunConfig.outputs.data }} -############################################################################################## -############################ FUNCTIONAl STATEFUL TESTS ####################################### -############################################################################################## - FunctionalStatefulTestRelease: - needs: [RunConfig, BuilderDebRelease] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stateful tests (release) - runner_type: func-tester - data: ${{ needs.RunConfig.outputs.data }} - FunctionalStatefulTestAarch64: - needs: [RunConfig, BuilderDebAarch64] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stateful tests (aarch64) - runner_type: func-tester-aarch64 - data: ${{ needs.RunConfig.outputs.data }} - FunctionalStatefulTestAsan: - needs: [RunConfig, BuilderDebAsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stateful tests (asan) - runner_type: func-tester - data: ${{ needs.RunConfig.outputs.data }} - FunctionalStatefulTestTsan: - needs: [RunConfig, BuilderDebTsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stateful tests (tsan) - runner_type: func-tester - data: ${{ needs.RunConfig.outputs.data }} - FunctionalStatefulTestMsan: - needs: [RunConfig, BuilderDebMsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stateful tests (msan) - runner_type: func-tester - data: ${{ needs.RunConfig.outputs.data }} - FunctionalStatefulTestUBsan: - needs: [RunConfig, BuilderDebUBsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stateful tests (ubsan) - runner_type: func-tester - data: ${{ needs.RunConfig.outputs.data }} - FunctionalStatefulTestDebug: - needs: [RunConfig, BuilderDebDebug] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stateful tests (debug) - runner_type: func-tester - data: ${{ needs.RunConfig.outputs.data }} - # Parallel replicas - FunctionalStatefulTestDebugParallelReplicas: - needs: [RunConfig, BuilderDebDebug] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stateful tests (debug, ParallelReplicas) - runner_type: func-tester - data: ${{ needs.RunConfig.outputs.data }} - FunctionalStatefulTestUBsanParallelReplicas: - needs: [RunConfig, BuilderDebUBsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stateful tests (ubsan, ParallelReplicas) - runner_type: func-tester - data: ${{ needs.RunConfig.outputs.data }} - FunctionalStatefulTestMsanParallelReplicas: - needs: [RunConfig, BuilderDebMsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stateful tests (msan, ParallelReplicas) - runner_type: func-tester - data: ${{ needs.RunConfig.outputs.data }} - FunctionalStatefulTestTsanParallelReplicas: - needs: [RunConfig, BuilderDebTsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stateful tests (tsan, ParallelReplicas) - runner_type: func-tester - data: ${{ needs.RunConfig.outputs.data }} - FunctionalStatefulTestAsanParallelReplicas: - needs: [RunConfig, BuilderDebAsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stateful tests (asan, ParallelReplicas) - runner_type: func-tester - data: ${{ needs.RunConfig.outputs.data }} - FunctionalStatefulTestReleaseParallelReplicas: - needs: [RunConfig, BuilderDebRelease] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stateful tests (release, ParallelReplicas) - runner_type: func-tester - data: ${{ needs.RunConfig.outputs.data }} -############################################################################################## -########################### ClickBench ####################################################### -############################################################################################## - ClickBenchAMD64: - needs: [RunConfig, BuilderDebRelease] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: ClickBench (amd64) - runner_type: func-tester - data: ${{ needs.RunConfig.outputs.data }} - run_command: | - python3 clickbench.py "$CHECK_NAME" - ClickBenchAarch64: - needs: [RunConfig, BuilderDebAarch64] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: ClickBench (aarch64) - runner_type: func-tester-aarch64 - data: ${{ needs.RunConfig.outputs.data }} - run_command: | - python3 clickbench.py "$CHECK_NAME" -############################################################################################## -######################################### STRESS TESTS ####################################### -############################################################################################## - StressTestAsan: - needs: [RunConfig, BuilderDebAsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stress test (asan) - runner_type: stress-tester - data: ${{ needs.RunConfig.outputs.data }} - StressTestTsan: - needs: [RunConfig, BuilderDebTsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stress test (tsan) - runner_type: stress-tester - data: ${{ needs.RunConfig.outputs.data }} - StressTestTsanAzure: - needs: [RunConfig, BuilderDebTsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stress test (azure, tsan) - runner_type: stress-tester - data: ${{ needs.RunConfig.outputs.data }} - StressTestMsan: - needs: [RunConfig, BuilderDebMsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stress test (msan) - runner_type: stress-tester - data: ${{ needs.RunConfig.outputs.data }} - StressTestUBsan: - needs: [RunConfig, BuilderDebUBsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stress test (ubsan) - runner_type: stress-tester - data: ${{ needs.RunConfig.outputs.data }} - StressTestDebug: - needs: [RunConfig, BuilderDebDebug] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stress test (debug) - runner_type: stress-tester - data: ${{ needs.RunConfig.outputs.data }} -############################################################################################# -############################# INTEGRATION TESTS ############################################# -############################################################################################# - IntegrationTestsAsan: - needs: [RunConfig, BuilderDebAsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Integration tests (asan) - runner_type: stress-tester - data: ${{ needs.RunConfig.outputs.data }} - IntegrationTestsAnalyzerAsan: - needs: [RunConfig, BuilderDebAsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Integration tests (asan, old analyzer) - runner_type: stress-tester - data: ${{ needs.RunConfig.outputs.data }} - IntegrationTestsTsan: - needs: [RunConfig, BuilderDebTsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Integration tests (tsan) - runner_type: stress-tester - data: ${{ needs.RunConfig.outputs.data }} - IntegrationTestsRelease: - needs: [RunConfig, BuilderDebRelease] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Integration tests (release) - runner_type: stress-tester - data: ${{ needs.RunConfig.outputs.data }} -############################################################################################## -##################################### AST FUZZERS ############################################ -############################################################################################## - ASTFuzzerTestAsan: - needs: [RunConfig, BuilderDebAsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: AST fuzzer (asan) - runner_type: fuzzer-unit-tester - data: ${{ needs.RunConfig.outputs.data }} - ASTFuzzerTestTsan: - needs: [RunConfig, BuilderDebTsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: AST fuzzer (tsan) - runner_type: fuzzer-unit-tester - data: ${{ needs.RunConfig.outputs.data }} - ASTFuzzerTestUBSan: - needs: [RunConfig, BuilderDebUBsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: AST fuzzer (ubsan) - runner_type: fuzzer-unit-tester - data: ${{ needs.RunConfig.outputs.data }} - ASTFuzzerTestMSan: - needs: [RunConfig, BuilderDebMsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: AST fuzzer (msan) - runner_type: fuzzer-unit-tester - data: ${{ needs.RunConfig.outputs.data }} - ASTFuzzerTestDebug: - needs: [RunConfig, BuilderDebDebug] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: AST fuzzer (debug) - runner_type: fuzzer-unit-tester - data: ${{ needs.RunConfig.outputs.data }} -############################################################################################# -#################################### UNIT TESTS ############################################# -############################################################################################# - UnitTestsAsan: - needs: [RunConfig, BuilderDebAsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Unit tests (asan) - runner_type: fuzzer-unit-tester - data: ${{ needs.RunConfig.outputs.data }} - UnitTestsReleaseClang: - needs: [RunConfig, BuilderBinRelease] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Unit tests (release) - runner_type: fuzzer-unit-tester - data: ${{ needs.RunConfig.outputs.data }} - UnitTestsTsan: - needs: [RunConfig, BuilderDebTsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Unit tests (tsan) - runner_type: fuzzer-unit-tester - data: ${{ needs.RunConfig.outputs.data }} - UnitTestsMsan: - needs: [RunConfig, BuilderDebMsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Unit tests (msan) - runner_type: fuzzer-unit-tester - data: ${{ needs.RunConfig.outputs.data }} - UnitTestsUBsan: - needs: [RunConfig, BuilderDebUBsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Unit tests (ubsan) - runner_type: fuzzer-unit-tester - data: ${{ needs.RunConfig.outputs.data }} -############################################################################################# -#################################### PERFORMANCE TESTS ###################################### -############################################################################################# - PerformanceComparisonX86: - needs: [RunConfig, BuilderDebRelease] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Performance Comparison - runner_type: stress-tester - data: ${{ needs.RunConfig.outputs.data }} - PerformanceComparisonAarch: - needs: [RunConfig, BuilderDebAarch64] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Performance Comparison Aarch64 - runner_type: func-tester-aarch64 - data: ${{ needs.RunConfig.outputs.data }} -############################################################################################## -############################ SQLLOGIC TEST ################################################### -############################################################################################## - SQLLogicTestRelease: - needs: [RunConfig, BuilderDebRelease] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Sqllogic test (release) - runner_type: func-tester - data: ${{ needs.RunConfig.outputs.data }} -############################################################################################## -##################################### SQL TEST ############################################### -############################################################################################## - SQLTest: - needs: [RunConfig, BuilderDebRelease] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: SQLTest - runner_type: fuzzer-unit-tester - data: ${{ needs.RunConfig.outputs.data }} -############################################################################################## -###################################### SQLANCER FUZZERS ###################################### -############################################################################################## - SQLancerTestRelease: - needs: [RunConfig, BuilderDebRelease] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: SQLancer (release) - runner_type: fuzzer-unit-tester - data: ${{ needs.RunConfig.outputs.data }} - SQLancerTestDebug: - needs: [RunConfig, BuilderDebDebug] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: SQLancer (debug) - runner_type: fuzzer-unit-tester - data: ${{ needs.RunConfig.outputs.data }} FinishCheck: - if: ${{ !failure() && !cancelled() }} - needs: - - MarkReleaseReady - - FunctionalStatelessTestDebug - - FunctionalStatelessTestRelease - - FunctionalStatelessTestReleaseAnalyzerS3Replicated - - FunctionalStatelessTestAarch64 - - FunctionalStatelessTestAsan - - FunctionalStatelessTestTsan - - FunctionalStatelessTestMsan - - FunctionalStatelessTestUBsan - - FunctionalStatelessTestS3Debug - - FunctionalStatelessTestS3Tsan - - FunctionalStatefulTestDebug - - FunctionalStatefulTestRelease - - FunctionalStatefulTestAarch64 - - FunctionalStatefulTestAsan - - FunctionalStatefulTestTsan - - FunctionalStatefulTestMsan - - FunctionalStatefulTestUBsan - - FunctionalStatefulTestDebugParallelReplicas - - FunctionalStatefulTestUBsanParallelReplicas - - FunctionalStatefulTestMsanParallelReplicas - - FunctionalStatefulTestTsanParallelReplicas - - FunctionalStatefulTestAsanParallelReplicas - - FunctionalStatefulTestReleaseParallelReplicas - - StressTestDebug - - StressTestAsan - - StressTestTsan - - StressTestMsan - - StressTestUBsan - - IntegrationTestsAsan - - IntegrationTestsAnalyzerAsan - - IntegrationTestsTsan - - IntegrationTestsRelease - - PerformanceComparisonX86 - - PerformanceComparisonAarch - - CompatibilityCheckX86 - - CompatibilityCheckAarch64 - - ASTFuzzerTestDebug - - ASTFuzzerTestAsan - - ASTFuzzerTestTsan - - ASTFuzzerTestMSan - - ASTFuzzerTestUBSan - - UnitTestsAsan - - UnitTestsTsan - - UnitTestsMsan - - UnitTestsUBsan - - UnitTestsReleaseClang - - SQLancerTestRelease - - SQLancerTestDebug - - SQLLogicTestRelease - - SQLTest - runs-on: [self-hosted, style-checker] + if: ${{ !cancelled() }} + needs: [RunConfig, Builds_1, Builds_2, Builds_1_Report, Builds_2_Report, Tests_1, Tests_2, Tests_3] + runs-on: [self-hosted, style-checker-aarch64] steps: - name: Check out repository code uses: ClickHouse/checkout@v1 - with: - clear-repository: true - name: Finish label run: | cd "$GITHUB_WORKSPACE/tests/ci" diff --git a/.github/workflows/merge_queue.yml b/.github/workflows/merge_queue.yml index 1b6cc320ec4..d1b03198485 100644 --- a/.github/workflows/merge_queue.yml +++ b/.github/workflows/merge_queue.yml @@ -20,8 +20,11 @@ jobs: uses: ClickHouse/checkout@v1 with: clear-repository: true # to ensure correct digests - fetch-depth: 0 # to get version + fetch-depth: 0 # to get a version filter: tree:0 + - name: Cancel PR workflow + run: | + python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --cancel-previous-run - name: Python unit tests run: | cd "$GITHUB_WORKSPACE/tests/ci" @@ -57,7 +60,7 @@ jobs: uses: ./.github/workflows/reusable_test.yml with: test_name: Style check - runner_type: style-checker + runner_type: style-checker-aarch64 run_command: | python3 style_check.py data: ${{ needs.RunConfig.outputs.data }} @@ -82,7 +85,7 @@ jobs: FinishCheck: if: ${{ !failure() && !cancelled() }} needs: [RunConfig, BuildDockers, StyleCheck, FastTest] - runs-on: [self-hosted, style-checker] + runs-on: [self-hosted, style-checker-aarch64] steps: - name: Check out repository code uses: ClickHouse/checkout@v1 diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index f20e987db97..7d22554473e 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -31,8 +31,14 @@ jobs: uses: ClickHouse/checkout@v1 with: clear-repository: true # to ensure correct digests - fetch-depth: 0 # to get version + fetch-depth: 0 # to get a version filter: tree:0 + - name: Cancel previous Sync PR workflow + run: | + python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --cancel-previous-run + - name: Set pending Sync status + run: | + python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --set-pending-status - name: Labels check run: | cd "$GITHUB_WORKSPACE/tests/ci" @@ -75,7 +81,7 @@ jobs: uses: ./.github/workflows/reusable_test.yml with: test_name: Style check - runner_type: style-checker + runner_type: style-checker-aarch64 run_command: | python3 style_check.py data: ${{ needs.RunConfig.outputs.data }} @@ -95,13 +101,13 @@ jobs: run_command: | python3 fast_test_check.py - ################################# Main statges ################################# + ################################# Main stages ################################# # for main CI chain # Builds_1: needs: [RunConfig, StyleCheck, FastTest] if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Builds_1') }} - # using callable wf (reusable_stage.yml) allows to group all nested jobs under a tab + # using callable wf (reusable_stage.yml) allows grouping all nested jobs under a tab uses: ./.github/workflows/reusable_build_stage.yml with: stage: Builds_1 @@ -109,7 +115,6 @@ jobs: Tests_1: needs: [RunConfig, Builds_1] if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Tests_1') }} - # using callable wf (reusable_stage.yml) allows to group all nested jobs under a tab uses: ./.github/workflows/reusable_test_stage.yml with: stage: Tests_1 @@ -117,7 +122,6 @@ jobs: Builds_2: needs: [RunConfig, Builds_1] if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Builds_2') }} - # using callable wf (reusable_stage.yml) allows to group all nested jobs under a tab uses: ./.github/workflows/reusable_build_stage.yml with: stage: Builds_2 @@ -125,7 +129,6 @@ jobs: Tests_2: needs: [RunConfig, Builds_2] if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Tests_2') }} - # using callable wf (reusable_stage.yml) allows to group all nested jobs under a tab uses: ./.github/workflows/reusable_test_stage.yml with: stage: Tests_2 @@ -177,9 +180,9 @@ jobs: ################################# Stage Final ################################# # FinishCheck: - if: ${{ !failure() && !cancelled() }} + if: ${{ !cancelled() }} needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2, Builds_1_Report, Builds_2_Report, Tests_1, Tests_2, Tests_3] - runs-on: [self-hosted, style-checker] + runs-on: [self-hosted, style-checker-aarch64] steps: - name: Check out repository code uses: ClickHouse/checkout@v1 @@ -189,13 +192,6 @@ jobs: run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 finish_check.py - # FIXME: merge on approval does not work with MQ. Could be fixed by using defaul GH's automerge after some corrections in Mergeable Check status - # - name: Auto merge if approved - # if: ${{ github.event_name != 'merge_group' }} - # run: | - # cd "$GITHUB_WORKSPACE/tests/ci" - # python3 merge_pr.py --check-approved - ############################################################################################# ###################################### JEPSEN TESTS ######################################### @@ -213,5 +209,5 @@ jobs: uses: ./.github/workflows/reusable_test.yml with: test_name: ClickHouse Keeper Jepsen - runner_type: style-checker + runner_type: style-checker-aarch64 data: ${{ needs.RunConfig.outputs.data }} diff --git a/.github/workflows/reusable_build.yml b/.github/workflows/reusable_build.yml index 80d78d93e1b..5e254d785ec 100644 --- a/.github/workflows/reusable_build.yml +++ b/.github/workflows/reusable_build.yml @@ -33,6 +33,10 @@ name: Build ClickHouse additional_envs: description: additional ENV variables to setup the job type: string + secrets: + secret_envs: + description: if given, it's passed to the environments + required: false jobs: Build: @@ -54,6 +58,7 @@ jobs: run: | cat >> "$GITHUB_ENV" << 'EOF' ${{inputs.additional_envs}} + ${{secrets.secret_envs}} DOCKER_TAG< - inline T allocateObjectForBorrowing(const std::unique_lock &, FactoryFunc && func) + T allocateObjectForBorrowing(const std::unique_lock &, FactoryFunc && func) { ++allocated_objects_size; ++borrowed_objects_size; @@ -137,7 +137,7 @@ private: return std::forward(func)(); } - inline T borrowFromObjects(const std::unique_lock &) + T borrowFromObjects(const std::unique_lock &) { T dst; detail::moveOrCopyIfThrow(std::move(objects.back()), dst); diff --git a/base/base/Decimal_fwd.h b/base/base/Decimal_fwd.h index beb228cea3c..a11e13a479b 100644 --- a/base/base/Decimal_fwd.h +++ b/base/base/Decimal_fwd.h @@ -44,6 +44,10 @@ concept is_over_big_int = || std::is_same_v || std::is_same_v || std::is_same_v; + +template +concept is_over_big_decimal = is_decimal && is_over_big_int; + } template <> struct is_signed { static constexpr bool value = true; }; diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index f8ff71876c6..dfbbb66a1e9 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -2,11 +2,11 @@ # NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION, # only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes. -SET(VERSION_REVISION 54486) +SET(VERSION_REVISION 54487) SET(VERSION_MAJOR 24) -SET(VERSION_MINOR 5) +SET(VERSION_MINOR 6) SET(VERSION_PATCH 1) -SET(VERSION_GITHASH 6d4b31322d168356c8b10c43b4cef157c82337ff) -SET(VERSION_DESCRIBE v24.5.1.1-testing) -SET(VERSION_STRING 24.5.1.1) +SET(VERSION_GITHASH 70a1d3a63d47f0be077d67b8deb907230fc7cfb0) +SET(VERSION_DESCRIBE v24.6.1.1-testing) +SET(VERSION_STRING 24.6.1.1) # end of autochange diff --git a/contrib/arrow b/contrib/arrow index 8f36d71d185..5cfccd8ea65 160000 --- a/contrib/arrow +++ b/contrib/arrow @@ -1 +1 @@ -Subproject commit 8f36d71d18587f1f315ec832f424183cb6519cbb +Subproject commit 5cfccd8ea65f33d4517e7409815d761c7650b45d diff --git a/contrib/aws b/contrib/aws index 2e12d7c6daf..eb96e740453 160000 --- a/contrib/aws +++ b/contrib/aws @@ -1 +1 @@ -Subproject commit 2e12d7c6dafa81311ee3d73ac6a178550ffa75be +Subproject commit eb96e740453ae27afa1f367ba19f99bdcb38484d diff --git a/docker/test/style/Dockerfile b/docker/test/style/Dockerfile index 5d53d03606f..172fbce6406 100644 --- a/docker/test/style/Dockerfile +++ b/docker/test/style/Dockerfile @@ -11,6 +11,7 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \ aspell \ curl \ git \ + gh \ file \ libxml2-utils \ moreutils \ diff --git a/docs/_description_templates/template-setting.md b/docs/_description_templates/template-setting.md index fc912aba3e1..f4525d872df 100644 --- a/docs/_description_templates/template-setting.md +++ b/docs/_description_templates/template-setting.md @@ -2,7 +2,7 @@ Description. -For the switch setting, use the typical phrase: “Enables or disables something …”. +For the switch setting, use the typical phrase: “Enables or disables something ...”. Possible values: diff --git a/docs/changelogs/v20.7.1.4310-prestable.md b/docs/changelogs/v20.7.1.4310-prestable.md index f47c7334228..aa1d993b263 100644 --- a/docs/changelogs/v20.7.1.4310-prestable.md +++ b/docs/changelogs/v20.7.1.4310-prestable.md @@ -166,4 +166,4 @@ * NO CL ENTRY: 'Revert "Abort on std::out_of_range in debug builds"'. [#12752](https://github.com/ClickHouse/ClickHouse/pull/12752) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * NO CL ENTRY: 'Bump protobuf from 3.12.2 to 3.12.4 in /docs/tools'. [#13102](https://github.com/ClickHouse/ClickHouse/pull/13102) ([dependabot-preview[bot]](https://github.com/apps/dependabot-preview)). * NO CL ENTRY: 'Merge [#12574](https://github.com/ClickHouse/ClickHouse/issues/12574)'. [#13158](https://github.com/ClickHouse/ClickHouse/pull/13158) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* NO CL ENTRY: 'Revert "Add QueryTimeMicroseconds, SelectQueryTimeMicroseconds and InsertQuer…"'. [#13303](https://github.com/ClickHouse/ClickHouse/pull/13303) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Add QueryTimeMicroseconds, SelectQueryTimeMicroseconds and InsertQuer..."'. [#13303](https://github.com/ClickHouse/ClickHouse/pull/13303) ([Alexey Milovidov](https://github.com/alexey-milovidov)). diff --git a/docs/changelogs/v21.12.1.9017-prestable.md b/docs/changelogs/v21.12.1.9017-prestable.md index 88b8260e312..bd84873e67a 100644 --- a/docs/changelogs/v21.12.1.9017-prestable.md +++ b/docs/changelogs/v21.12.1.9017-prestable.md @@ -421,5 +421,5 @@ sidebar_label: 2022 * Fix possible crash in DataTypeAggregateFunction [#32287](https://github.com/ClickHouse/ClickHouse/pull/32287) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). * Update backport.py [#32323](https://github.com/ClickHouse/ClickHouse/pull/32323) ([Kseniia Sumarokova](https://github.com/kssenii)). * Fix graphite-bench build [#32351](https://github.com/ClickHouse/ClickHouse/pull/32351) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Revert "graphite: split tagged/plain rollup rules (for merges perfoma… [#32376](https://github.com/ClickHouse/ClickHouse/pull/32376) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Revert "graphite: split tagged/plain rollup rules (for merges perfoma... [#32376](https://github.com/ClickHouse/ClickHouse/pull/32376) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). * Another attempt to fix unit test Executor::RemoveTasksStress [#32390](https://github.com/ClickHouse/ClickHouse/pull/32390) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). diff --git a/docs/changelogs/v21.3.3.14-lts.md b/docs/changelogs/v21.3.3.14-lts.md index 57bde602f21..91d99deaa6b 100644 --- a/docs/changelogs/v21.3.3.14-lts.md +++ b/docs/changelogs/v21.3.3.14-lts.md @@ -18,4 +18,4 @@ sidebar_label: 2022 #### NOT FOR CHANGELOG / INSIGNIFICANT -* fix incorrect number of rows for Chunks with no columns in PartialSor… [#21761](https://github.com/ClickHouse/ClickHouse/pull/21761) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* fix incorrect number of rows for Chunks with no columns in PartialSor... [#21761](https://github.com/ClickHouse/ClickHouse/pull/21761) ([Alexander Kuzmenkov](https://github.com/akuzm)). diff --git a/docs/changelogs/v21.4.1.6422-prestable.md b/docs/changelogs/v21.4.1.6422-prestable.md index 2eadb0d4754..66937c3be15 100644 --- a/docs/changelogs/v21.4.1.6422-prestable.md +++ b/docs/changelogs/v21.4.1.6422-prestable.md @@ -223,7 +223,7 @@ sidebar_label: 2022 * Do not overlap zookeeper path for ReplicatedMergeTree in stateless *.sh tests [#21724](https://github.com/ClickHouse/ClickHouse/pull/21724) ([Azat Khuzhin](https://github.com/azat)). * make the fuzzer use sources from the CI [#21754](https://github.com/ClickHouse/ClickHouse/pull/21754) ([Alexander Kuzmenkov](https://github.com/akuzm)). * Add one more variant to memcpy benchmark [#21759](https://github.com/ClickHouse/ClickHouse/pull/21759) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* fix incorrect number of rows for Chunks with no columns in PartialSor… [#21761](https://github.com/ClickHouse/ClickHouse/pull/21761) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* fix incorrect number of rows for Chunks with no columns in PartialSor... [#21761](https://github.com/ClickHouse/ClickHouse/pull/21761) ([Alexander Kuzmenkov](https://github.com/akuzm)). * docs(fix): typo [#21775](https://github.com/ClickHouse/ClickHouse/pull/21775) ([Ali Demirci](https://github.com/depyronick)). * DDLWorker.cpp: fixed exceeded amount of tries typo [#21807](https://github.com/ClickHouse/ClickHouse/pull/21807) ([Eldar Nasyrov](https://github.com/3ldar-nasyrov)). * fix integration MaterializeMySQL test [#21819](https://github.com/ClickHouse/ClickHouse/pull/21819) ([TCeason](https://github.com/TCeason)). diff --git a/docs/changelogs/v21.4.2.10-prestable.md b/docs/changelogs/v21.4.2.10-prestable.md index 3db17ddfcf3..b9bdbd80c0c 100644 --- a/docs/changelogs/v21.4.2.10-prestable.md +++ b/docs/changelogs/v21.4.2.10-prestable.md @@ -226,7 +226,7 @@ sidebar_label: 2022 * Do not overlap zookeeper path for ReplicatedMergeTree in stateless *.sh tests [#21724](https://github.com/ClickHouse/ClickHouse/pull/21724) ([Azat Khuzhin](https://github.com/azat)). * make the fuzzer use sources from the CI [#21754](https://github.com/ClickHouse/ClickHouse/pull/21754) ([Alexander Kuzmenkov](https://github.com/akuzm)). * Add one more variant to memcpy benchmark [#21759](https://github.com/ClickHouse/ClickHouse/pull/21759) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* fix incorrect number of rows for Chunks with no columns in PartialSor… [#21761](https://github.com/ClickHouse/ClickHouse/pull/21761) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* fix incorrect number of rows for Chunks with no columns in PartialSor... [#21761](https://github.com/ClickHouse/ClickHouse/pull/21761) ([Alexander Kuzmenkov](https://github.com/akuzm)). * docs(fix): typo [#21775](https://github.com/ClickHouse/ClickHouse/pull/21775) ([Ali Demirci](https://github.com/depyronick)). * DDLWorker.cpp: fixed exceeded amount of tries typo [#21807](https://github.com/ClickHouse/ClickHouse/pull/21807) ([Eldar Nasyrov](https://github.com/3ldar-nasyrov)). * fix integration MaterializeMySQL test [#21819](https://github.com/ClickHouse/ClickHouse/pull/21819) ([TCeason](https://github.com/TCeason)). diff --git a/docs/changelogs/v22.6.1.1985-stable.md b/docs/changelogs/v22.6.1.1985-stable.md index c915d24fe00..7bd7038377a 100644 --- a/docs/changelogs/v22.6.1.1985-stable.md +++ b/docs/changelogs/v22.6.1.1985-stable.md @@ -160,7 +160,7 @@ sidebar_label: 2022 * fix toString error on DatatypeDate32. [#37775](https://github.com/ClickHouse/ClickHouse/pull/37775) ([LiuNeng](https://github.com/liuneng1994)). * The clickhouse-keeper setting `dead_session_check_period_ms` was transformed into microseconds (multiplied by 1000), which lead to dead sessions only being cleaned up after several minutes (instead of 500ms). [#37824](https://github.com/ClickHouse/ClickHouse/pull/37824) ([Michael Lex](https://github.com/mlex)). * Fix possible "No more packets are available" for distributed queries (in case of `async_socket_for_remote`/`use_hedged_requests` is disabled). [#37826](https://github.com/ClickHouse/ClickHouse/pull/37826) ([Azat Khuzhin](https://github.com/azat)). -* Do not drop the inner target table when executing `ALTER TABLE … MODIFY QUERY` in WindowView. [#37879](https://github.com/ClickHouse/ClickHouse/pull/37879) ([vxider](https://github.com/Vxider)). +* Do not drop the inner target table when executing `ALTER TABLE ... MODIFY QUERY` in WindowView. [#37879](https://github.com/ClickHouse/ClickHouse/pull/37879) ([vxider](https://github.com/Vxider)). * Fix directory ownership of coordination dir in clickhouse-keeper Docker image. Fixes [#37914](https://github.com/ClickHouse/ClickHouse/issues/37914). [#37915](https://github.com/ClickHouse/ClickHouse/pull/37915) ([James Maidment](https://github.com/jamesmaidment)). * Dictionaries fix custom query with update field and `{condition}`. Closes [#33746](https://github.com/ClickHouse/ClickHouse/issues/33746). [#37947](https://github.com/ClickHouse/ClickHouse/pull/37947) ([Maksim Kita](https://github.com/kitaisreal)). * Fix possible incorrect result of `SELECT ... WITH FILL` in the case when `ORDER BY` should be applied after `WITH FILL` result (e.g. for outer query). Incorrect result was caused by optimization for `ORDER BY` expressions ([#35623](https://github.com/ClickHouse/ClickHouse/issues/35623)). Closes [#37904](https://github.com/ClickHouse/ClickHouse/issues/37904). [#37959](https://github.com/ClickHouse/ClickHouse/pull/37959) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). @@ -180,7 +180,7 @@ sidebar_label: 2022 #### NO CL ENTRY * NO CL ENTRY: 'Revert "Fix mutations in tables with columns of type `Object`"'. [#37355](https://github.com/ClickHouse/ClickHouse/pull/37355) ([Alexander Tokmakov](https://github.com/tavplubix)). -* NO CL ENTRY: 'Revert "Remove height restrictions from the query div in play web tool, and m…"'. [#37501](https://github.com/ClickHouse/ClickHouse/pull/37501) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Remove height restrictions from the query div in play web tool, and m..."'. [#37501](https://github.com/ClickHouse/ClickHouse/pull/37501) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * NO CL ENTRY: 'Revert "Add support for preprocessing ZooKeeper operations in `clickhouse-keeper`"'. [#37534](https://github.com/ClickHouse/ClickHouse/pull/37534) ([Antonio Andelic](https://github.com/antonio2368)). * NO CL ENTRY: 'Revert "(only with zero-copy replication, non-production experimental feature not recommended to use) fix possible deadlock during fetching part"'. [#37545](https://github.com/ClickHouse/ClickHouse/pull/37545) ([Alexander Tokmakov](https://github.com/tavplubix)). * NO CL ENTRY: 'Revert "RFC: Fix converting types for UNION queries (may produce LOGICAL_ERROR)"'. [#37582](https://github.com/ClickHouse/ClickHouse/pull/37582) ([Dmitry Novik](https://github.com/novikd)). diff --git a/docs/changelogs/v22.7.1.2484-stable.md b/docs/changelogs/v22.7.1.2484-stable.md index 7464b0449ee..c4a76c66e0c 100644 --- a/docs/changelogs/v22.7.1.2484-stable.md +++ b/docs/changelogs/v22.7.1.2484-stable.md @@ -410,7 +410,7 @@ sidebar_label: 2022 * Add test for [#39132](https://github.com/ClickHouse/ClickHouse/issues/39132) [#39173](https://github.com/ClickHouse/ClickHouse/pull/39173) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). * Suppression for BC check (`Cannot parse string 'Hello' as UInt64`) [#39176](https://github.com/ClickHouse/ClickHouse/pull/39176) ([Alexander Tokmakov](https://github.com/tavplubix)). * Fix 01961_roaring_memory_tracking test [#39187](https://github.com/ClickHouse/ClickHouse/pull/39187) ([Dmitry Novik](https://github.com/novikd)). -* Cleanup: done during [#38719](https://github.com/ClickHouse/ClickHouse/issues/38719) (SortingStep: deduce way to sort based on … [#39191](https://github.com/ClickHouse/ClickHouse/pull/39191) ([Igor Nikonov](https://github.com/devcrafter)). +* Cleanup: done during [#38719](https://github.com/ClickHouse/ClickHouse/issues/38719) (SortingStep: deduce way to sort based on ... [#39191](https://github.com/ClickHouse/ClickHouse/pull/39191) ([Igor Nikonov](https://github.com/devcrafter)). * Fix exception in AsynchronousMetrics for s390x [#39193](https://github.com/ClickHouse/ClickHouse/pull/39193) ([Harry Lee](https://github.com/HarryLeeIBM)). * Optimize accesses to system.stack_trace (filter by name before sending signal) [#39212](https://github.com/ClickHouse/ClickHouse/pull/39212) ([Azat Khuzhin](https://github.com/azat)). * Enable warning "-Wdeprecated-dynamic-exception-spec" [#39213](https://github.com/ClickHouse/ClickHouse/pull/39213) ([Robert Schulze](https://github.com/rschu1ze)). diff --git a/docs/changelogs/v22.8.13.20-lts.md b/docs/changelogs/v22.8.13.20-lts.md index 0734f40bf3e..ad44fbfc5d6 100644 --- a/docs/changelogs/v22.8.13.20-lts.md +++ b/docs/changelogs/v22.8.13.20-lts.md @@ -20,4 +20,4 @@ sidebar_label: 2023 * Fix wrong approved_at, simplify conditions [#45302](https://github.com/ClickHouse/ClickHouse/pull/45302) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). * Get rid of artifactory in favor of r2 + ch-repos-manager [#45421](https://github.com/ClickHouse/ClickHouse/pull/45421) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). * Trim refs/tags/ from GITHUB_TAG in release workflow [#45636](https://github.com/ClickHouse/ClickHouse/pull/45636) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -* Merge pull request [#38262](https://github.com/ClickHouse/ClickHouse/issues/38262) from PolyProgrammist/fix-ordinary-system-un… [#45650](https://github.com/ClickHouse/ClickHouse/pull/45650) ([alesapin](https://github.com/alesapin)). +* Merge pull request [#38262](https://github.com/ClickHouse/ClickHouse/issues/38262) from PolyProgrammist/fix-ordinary-system-un... [#45650](https://github.com/ClickHouse/ClickHouse/pull/45650) ([alesapin](https://github.com/alesapin)). diff --git a/docs/changelogs/v23.11.1.2711-stable.md b/docs/changelogs/v23.11.1.2711-stable.md index e32dee41dc7..0bdee08f5c9 100644 --- a/docs/changelogs/v23.11.1.2711-stable.md +++ b/docs/changelogs/v23.11.1.2711-stable.md @@ -217,7 +217,7 @@ sidebar_label: 2023 * S3Queue minor fix [#56999](https://github.com/ClickHouse/ClickHouse/pull/56999) ([Kseniia Sumarokova](https://github.com/kssenii)). * Fix file path validation for DatabaseFileSystem [#57029](https://github.com/ClickHouse/ClickHouse/pull/57029) ([San](https://github.com/santrancisco)). * Fix `fuzzBits` with `ARRAY JOIN` [#57033](https://github.com/ClickHouse/ClickHouse/pull/57033) ([Antonio Andelic](https://github.com/antonio2368)). -* Fix Nullptr dereference in partial merge join with joined_subquery_re… [#57048](https://github.com/ClickHouse/ClickHouse/pull/57048) ([vdimir](https://github.com/vdimir)). +* Fix Nullptr dereference in partial merge join with joined_subquery_re... [#57048](https://github.com/ClickHouse/ClickHouse/pull/57048) ([vdimir](https://github.com/vdimir)). * Fix race condition in RemoteSource [#57052](https://github.com/ClickHouse/ClickHouse/pull/57052) ([Raúl Marín](https://github.com/Algunenano)). * Implement `bitHammingDistance` for big integers [#57073](https://github.com/ClickHouse/ClickHouse/pull/57073) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * S3-style links bug fix [#57075](https://github.com/ClickHouse/ClickHouse/pull/57075) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). diff --git a/docs/changelogs/v23.12.1.1368-stable.md b/docs/changelogs/v23.12.1.1368-stable.md index 1a322ae9c0f..cb8ba57100e 100644 --- a/docs/changelogs/v23.12.1.1368-stable.md +++ b/docs/changelogs/v23.12.1.1368-stable.md @@ -272,7 +272,7 @@ sidebar_label: 2023 * Bump Azure to v1.6.0 [#58052](https://github.com/ClickHouse/ClickHouse/pull/58052) ([Robert Schulze](https://github.com/rschu1ze)). * Correct values for randomization [#58058](https://github.com/ClickHouse/ClickHouse/pull/58058) ([Anton Popov](https://github.com/CurtizJ)). * Non post request should be readonly [#58060](https://github.com/ClickHouse/ClickHouse/pull/58060) ([San](https://github.com/santrancisco)). -* Revert "Merge pull request [#55710](https://github.com/ClickHouse/ClickHouse/issues/55710) from guoxiaolongzte/clickhouse-test… [#58066](https://github.com/ClickHouse/ClickHouse/pull/58066) ([Raúl Marín](https://github.com/Algunenano)). +* Revert "Merge pull request [#55710](https://github.com/ClickHouse/ClickHouse/issues/55710) from guoxiaolongzte/clickhouse-test... [#58066](https://github.com/ClickHouse/ClickHouse/pull/58066) ([Raúl Marín](https://github.com/Algunenano)). * fix typo in the test 02479 [#58072](https://github.com/ClickHouse/ClickHouse/pull/58072) ([Sema Checherinda](https://github.com/CheSema)). * Bump Azure to 1.7.2 [#58075](https://github.com/ClickHouse/ClickHouse/pull/58075) ([Robert Schulze](https://github.com/rschu1ze)). * Fix flaky test `02567_and_consistency` [#58076](https://github.com/ClickHouse/ClickHouse/pull/58076) ([Anton Popov](https://github.com/CurtizJ)). diff --git a/docs/changelogs/v23.3.1.2823-lts.md b/docs/changelogs/v23.3.1.2823-lts.md index 0c9be3601da..f81aba53ebe 100644 --- a/docs/changelogs/v23.3.1.2823-lts.md +++ b/docs/changelogs/v23.3.1.2823-lts.md @@ -520,7 +520,7 @@ sidebar_label: 2023 * Improve script for updating clickhouse-docs [#48135](https://github.com/ClickHouse/ClickHouse/pull/48135) ([Alexander Tokmakov](https://github.com/tavplubix)). * Fix stdlib compatibility issues [#48150](https://github.com/ClickHouse/ClickHouse/pull/48150) ([DimasKovas](https://github.com/DimasKovas)). * Make test test_disallow_concurrency less flaky [#48152](https://github.com/ClickHouse/ClickHouse/pull/48152) ([Vitaly Baranov](https://github.com/vitlibar)). -* Remove unused mockSystemDatabase from gtest_transform_query_for_exter… [#48162](https://github.com/ClickHouse/ClickHouse/pull/48162) ([Vladimir C](https://github.com/vdimir)). +* Remove unused mockSystemDatabase from gtest_transform_query_for_exter... [#48162](https://github.com/ClickHouse/ClickHouse/pull/48162) ([Vladimir C](https://github.com/vdimir)). * Update environmental-sensors.md [#48166](https://github.com/ClickHouse/ClickHouse/pull/48166) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * Correctly handle NULL constants in logical optimizer for new analyzer [#48168](https://github.com/ClickHouse/ClickHouse/pull/48168) ([Antonio Andelic](https://github.com/antonio2368)). * Try making KeeperMap test more stable [#48170](https://github.com/ClickHouse/ClickHouse/pull/48170) ([Antonio Andelic](https://github.com/antonio2368)). diff --git a/docs/changelogs/v23.5.1.3174-stable.md b/docs/changelogs/v23.5.1.3174-stable.md index 2212eb6e893..4bdd4139afc 100644 --- a/docs/changelogs/v23.5.1.3174-stable.md +++ b/docs/changelogs/v23.5.1.3174-stable.md @@ -474,7 +474,7 @@ sidebar_label: 2023 * Fix flakiness of test_distributed_load_balancing test [#49921](https://github.com/ClickHouse/ClickHouse/pull/49921) ([Azat Khuzhin](https://github.com/azat)). * Add some logging [#49925](https://github.com/ClickHouse/ClickHouse/pull/49925) ([Kseniia Sumarokova](https://github.com/kssenii)). * Support hardlinking parts transactionally [#49931](https://github.com/ClickHouse/ClickHouse/pull/49931) ([Michael Kolupaev](https://github.com/al13n321)). -* Fix for analyzer: 02377_ optimize_sorting_by_input_stream_properties_e… [#49943](https://github.com/ClickHouse/ClickHouse/pull/49943) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix for analyzer: 02377_ optimize_sorting_by_input_stream_properties_e... [#49943](https://github.com/ClickHouse/ClickHouse/pull/49943) ([Igor Nikonov](https://github.com/devcrafter)). * Follow up to [#49429](https://github.com/ClickHouse/ClickHouse/issues/49429) [#49964](https://github.com/ClickHouse/ClickHouse/pull/49964) ([Kseniia Sumarokova](https://github.com/kssenii)). * Fix flaky test_ssl_cert_authentication to use urllib3 [#49982](https://github.com/ClickHouse/ClickHouse/pull/49982) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). * Fix woboq codebrowser build with -Wno-poison-system-directories [#49992](https://github.com/ClickHouse/ClickHouse/pull/49992) ([Azat Khuzhin](https://github.com/azat)). diff --git a/docs/changelogs/v23.8.1.2992-lts.md b/docs/changelogs/v23.8.1.2992-lts.md index 7c224b19350..05385d9c52b 100644 --- a/docs/changelogs/v23.8.1.2992-lts.md +++ b/docs/changelogs/v23.8.1.2992-lts.md @@ -272,7 +272,7 @@ sidebar_label: 2023 * Add more checks into ThreadStatus ctor. [#42019](https://github.com/ClickHouse/ClickHouse/pull/42019) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). * Refactor Query Tree visitor [#46740](https://github.com/ClickHouse/ClickHouse/pull/46740) ([Dmitry Novik](https://github.com/novikd)). * Revert "Revert "Randomize JIT settings in tests"" [#48282](https://github.com/ClickHouse/ClickHouse/pull/48282) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Fix outdated cache configuration in s3 tests: s3_storage_policy_by_defau… [#48424](https://github.com/ClickHouse/ClickHouse/pull/48424) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix outdated cache configuration in s3 tests: s3_storage_policy_by_defau... [#48424](https://github.com/ClickHouse/ClickHouse/pull/48424) ([Kseniia Sumarokova](https://github.com/kssenii)). * Fix IN with decimal in analyzer [#48754](https://github.com/ClickHouse/ClickHouse/pull/48754) ([vdimir](https://github.com/vdimir)). * Some unclear change in StorageBuffer::reschedule() for something [#49723](https://github.com/ClickHouse/ClickHouse/pull/49723) ([DimasKovas](https://github.com/DimasKovas)). * MergeTree & SipHash checksum big-endian support [#50276](https://github.com/ClickHouse/ClickHouse/pull/50276) ([ltrk2](https://github.com/ltrk2)). diff --git a/docs/changelogs/v24.1.3.31-stable.md b/docs/changelogs/v24.1.3.31-stable.md index 046ca451fbc..e898fba5c87 100644 --- a/docs/changelogs/v24.1.3.31-stable.md +++ b/docs/changelogs/v24.1.3.31-stable.md @@ -13,7 +13,7 @@ sidebar_label: 2024 #### Bug Fix (user-visible misbehavior in an official stable release) -* Fix `ASTAlterCommand::formatImpl` in case of column specific settings… [#59445](https://github.com/ClickHouse/ClickHouse/pull/59445) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Fix `ASTAlterCommand::formatImpl` in case of column specific settings... [#59445](https://github.com/ClickHouse/ClickHouse/pull/59445) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). * Make MAX use the same rules as permutation for complex types [#59498](https://github.com/ClickHouse/ClickHouse/pull/59498) ([Raúl Marín](https://github.com/Algunenano)). * Fix corner case when passing `update_insert_deduplication_token_in_dependent_materialized_views` [#59544](https://github.com/ClickHouse/ClickHouse/pull/59544) ([Jordi Villar](https://github.com/jrdi)). * Fix incorrect result of arrayElement / map[] on empty value [#59594](https://github.com/ClickHouse/ClickHouse/pull/59594) ([Raúl Marín](https://github.com/Algunenano)). diff --git a/docs/changelogs/v24.2.1.2248-stable.md b/docs/changelogs/v24.2.1.2248-stable.md index 6113dd51ab1..02affe12c43 100644 --- a/docs/changelogs/v24.2.1.2248-stable.md +++ b/docs/changelogs/v24.2.1.2248-stable.md @@ -130,7 +130,7 @@ sidebar_label: 2024 * Fix translate() with FixedString input [#59356](https://github.com/ClickHouse/ClickHouse/pull/59356) ([Raúl Marín](https://github.com/Algunenano)). * Fix digest calculation in Keeper [#59439](https://github.com/ClickHouse/ClickHouse/pull/59439) ([Antonio Andelic](https://github.com/antonio2368)). * Fix stacktraces for binaries without debug symbols [#59444](https://github.com/ClickHouse/ClickHouse/pull/59444) ([Azat Khuzhin](https://github.com/azat)). -* Fix `ASTAlterCommand::formatImpl` in case of column specific settings… [#59445](https://github.com/ClickHouse/ClickHouse/pull/59445) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Fix `ASTAlterCommand::formatImpl` in case of column specific settings... [#59445](https://github.com/ClickHouse/ClickHouse/pull/59445) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). * Fix `SELECT * FROM [...] ORDER BY ALL` with Analyzer [#59462](https://github.com/ClickHouse/ClickHouse/pull/59462) ([zhongyuankai](https://github.com/zhongyuankai)). * Fix possible uncaught exception during distributed query cancellation [#59487](https://github.com/ClickHouse/ClickHouse/pull/59487) ([Azat Khuzhin](https://github.com/azat)). * Make MAX use the same rules as permutation for complex types [#59498](https://github.com/ClickHouse/ClickHouse/pull/59498) ([Raúl Marín](https://github.com/Algunenano)). diff --git a/docs/changelogs/v24.3.1.2672-lts.md b/docs/changelogs/v24.3.1.2672-lts.md index e5d008680a8..006ab941203 100644 --- a/docs/changelogs/v24.3.1.2672-lts.md +++ b/docs/changelogs/v24.3.1.2672-lts.md @@ -526,7 +526,7 @@ sidebar_label: 2024 * No "please" [#61916](https://github.com/ClickHouse/ClickHouse/pull/61916) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * Update version_date.tsv and changelogs after v23.12.6.19-stable [#61917](https://github.com/ClickHouse/ClickHouse/pull/61917) ([robot-clickhouse](https://github.com/robot-clickhouse)). * Update version_date.tsv and changelogs after v24.1.8.22-stable [#61918](https://github.com/ClickHouse/ClickHouse/pull/61918) ([robot-clickhouse](https://github.com/robot-clickhouse)). -* Fix flaky test_broken_projestions/test.py::test_broken_ignored_replic… [#61932](https://github.com/ClickHouse/ClickHouse/pull/61932) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix flaky test_broken_projestions/test.py::test_broken_ignored_replic... [#61932](https://github.com/ClickHouse/ClickHouse/pull/61932) ([Kseniia Sumarokova](https://github.com/kssenii)). * Check is Rust avaiable for build, if not, suggest a way to disable Rust support [#61938](https://github.com/ClickHouse/ClickHouse/pull/61938) ([Azat Khuzhin](https://github.com/azat)). * CI: new ci menu in PR body [#61948](https://github.com/ClickHouse/ClickHouse/pull/61948) ([Max K.](https://github.com/maxknv)). * Remove flaky test `01193_metadata_loading` [#61961](https://github.com/ClickHouse/ClickHouse/pull/61961) ([Nikita Taranov](https://github.com/nickitat)). diff --git a/docs/en/development/style.md b/docs/en/development/style.md index d201bbb0d3c..0f097d27607 100644 --- a/docs/en/development/style.md +++ b/docs/en/development/style.md @@ -57,7 +57,7 @@ memcpy(&buf[place_value], &x, sizeof(x)); for (size_t i = 0; i < rows; i += storage.index_granularity) ``` -**7.** Add spaces around binary operators (`+`, `-`, `*`, `/`, `%`, …) and the ternary operator `?:`. +**7.** Add spaces around binary operators (`+`, `-`, `*`, `/`, `%`, ...) and the ternary operator `?:`. ``` cpp UInt16 year = (s[0] - '0') * 1000 + (s[1] - '0') * 100 + (s[2] - '0') * 10 + (s[3] - '0'); @@ -86,7 +86,7 @@ dst.ClickGoodEvent = click.GoodEvent; If necessary, the operator can be wrapped to the next line. In this case, the offset in front of it is increased. -**11.** Do not use a space to separate unary operators (`--`, `++`, `*`, `&`, …) from the argument. +**11.** Do not use a space to separate unary operators (`--`, `++`, `*`, `&`, ...) from the argument. **12.** Put a space after a comma, but not before it. The same rule goes for a semicolon inside a `for` expression. @@ -115,7 +115,7 @@ public: **16.** If the same `namespace` is used for the entire file, and there isn’t anything else significant, an offset is not necessary inside `namespace`. -**17.** If the block for an `if`, `for`, `while`, or other expression consists of a single `statement`, the curly brackets are optional. Place the `statement` on a separate line, instead. This rule is also valid for nested `if`, `for`, `while`, … +**17.** If the block for an `if`, `for`, `while`, or other expression consists of a single `statement`, the curly brackets are optional. Place the `statement` on a separate line, instead. This rule is also valid for nested `if`, `for`, `while`, ... But if the inner `statement` contains curly brackets or `else`, the external block should be written in curly brackets. diff --git a/docs/en/engines/table-engines/integrations/hdfs.md b/docs/en/engines/table-engines/integrations/hdfs.md index dbd1c270a4a..2749fa7e479 100644 --- a/docs/en/engines/table-engines/integrations/hdfs.md +++ b/docs/en/engines/table-engines/integrations/hdfs.md @@ -118,7 +118,7 @@ If the listing of files contains number ranges with leading zeros, use the const **Example** -Create table with files named `file000`, `file001`, … , `file999`: +Create table with files named `file000`, `file001`, ... , `file999`: ``` sql CREATE TABLE big_table (name String, value UInt32) ENGINE = HDFS('hdfs://hdfs1:9000/big_dir/file{0..9}{0..9}{0..9}', 'CSV') diff --git a/docs/en/engines/table-engines/integrations/s3.md b/docs/en/engines/table-engines/integrations/s3.md index dfa06801d04..cb1da1c8e68 100644 --- a/docs/en/engines/table-engines/integrations/s3.md +++ b/docs/en/engines/table-engines/integrations/s3.md @@ -178,7 +178,7 @@ If the listing of files contains number ranges with leading zeros, use the const **Example with wildcards 1** -Create table with files named `file-000.csv`, `file-001.csv`, … , `file-999.csv`: +Create table with files named `file-000.csv`, `file-001.csv`, ... , `file-999.csv`: ``` sql CREATE TABLE big_table (name String, value UInt32) diff --git a/docs/en/engines/table-engines/integrations/s3queue.md b/docs/en/engines/table-engines/integrations/s3queue.md index 8ebab80423f..aa7fa512480 100644 --- a/docs/en/engines/table-engines/integrations/s3queue.md +++ b/docs/en/engines/table-engines/integrations/s3queue.md @@ -202,8 +202,7 @@ Example: CREATE TABLE s3queue_engine_table (name String, value UInt32) ENGINE=S3Queue('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/*', 'CSV', 'gzip') SETTINGS - mode = 'unordered', - keeper_path = '/clickhouse/s3queue/'; + mode = 'unordered'; CREATE TABLE stats (name String, value UInt32) ENGINE = MergeTree() ORDER BY name; diff --git a/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md b/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md index 23d98d4b20e..eda87fd06c1 100644 --- a/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md +++ b/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md @@ -71,7 +71,7 @@ WHERE table = 'visits' └───────────┴───────────────────┴────────┘ ``` -The `partition` column contains the names of the partitions. There are two partitions in this example: `201901` and `201902`. You can use this column value to specify the partition name in [ALTER … PARTITION](../../../sql-reference/statements/alter/partition.md) queries. +The `partition` column contains the names of the partitions. There are two partitions in this example: `201901` and `201902`. You can use this column value to specify the partition name in [ALTER ... PARTITION](../../../sql-reference/statements/alter/partition.md) queries. The `name` column contains the names of the partition data parts. You can use this column to specify the name of the part in the [ALTER ATTACH PART](../../../sql-reference/statements/alter/partition.md#alter_attach-partition) query. diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index 7862eef69f8..a009c4a32f3 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -954,7 +954,7 @@ In the case of `MergeTree` tables, data is getting to disk in different ways: - As a result of an insert (`INSERT` query). - During background merges and [mutations](/docs/en/sql-reference/statements/alter/index.md#alter-mutations). - When downloading from another replica. -- As a result of partition freezing [ALTER TABLE … FREEZE PARTITION](/docs/en/sql-reference/statements/alter/partition.md/#alter_freeze-partition). +- As a result of partition freezing [ALTER TABLE ... FREEZE PARTITION](/docs/en/sql-reference/statements/alter/partition.md/#alter_freeze-partition). In all these cases except for mutations and partition freezing, a part is stored on a volume and a disk according to the given storage policy: @@ -966,7 +966,7 @@ Under the hood, mutations and partition freezing make use of [hard links](https: In the background, parts are moved between volumes on the basis of the amount of free space (`move_factor` parameter) according to the order the volumes are declared in the configuration file. Data is never transferred from the last one and into the first one. One may use system tables [system.part_log](/docs/en/operations/system-tables/part_log.md/#system_tables-part-log) (field `type = MOVE_PART`) and [system.parts](/docs/en/operations/system-tables/parts.md/#system_tables-parts) (fields `path` and `disk`) to monitor background moves. Also, the detailed information can be found in server logs. -User can force moving a part or a partition from one volume to another using the query [ALTER TABLE … MOVE PART\|PARTITION … TO VOLUME\|DISK …](/docs/en/sql-reference/statements/alter/partition.md/#alter_move-partition), all the restrictions for background operations are taken into account. The query initiates a move on its own and does not wait for background operations to be completed. User will get an error message if not enough free space is available or if any of the required conditions are not met. +User can force moving a part or a partition from one volume to another using the query [ALTER TABLE ... MOVE PART\|PARTITION ... TO VOLUME\|DISK ...](/docs/en/sql-reference/statements/alter/partition.md/#alter_move-partition), all the restrictions for background operations are taken into account. The query initiates a move on its own and does not wait for background operations to be completed. User will get an error message if not enough free space is available or if any of the required conditions are not met. Moving data does not interfere with data replication. Therefore, different storage policies can be specified for the same table on different replicas. diff --git a/docs/en/engines/table-engines/special/external-data.md b/docs/en/engines/table-engines/special/external-data.md index 7ea3f3e30d6..f6d6dae7eb6 100644 --- a/docs/en/engines/table-engines/special/external-data.md +++ b/docs/en/engines/table-engines/special/external-data.md @@ -29,7 +29,7 @@ Only a single table can be retrieved from stdin. The following parameters are optional: **–name**– Name of the table. If omitted, _data is used. **–format** – Data format in the file. If omitted, TabSeparated is used. -One of the following parameters is required:**–types** – A list of comma-separated column types. For example: `UInt64,String`. The columns will be named _1, _2, … +One of the following parameters is required:**–types** – A list of comma-separated column types. For example: `UInt64,String`. The columns will be named _1, _2, ... **–structure**– The table structure in the format`UserID UInt64`, `URL String`. Defines the column names and types. The files specified in ‘file’ will be parsed by the format specified in ‘format’, using the data types specified in ‘types’ or ‘structure’. The table will be uploaded to the server and accessible there as a temporary table with the name in ‘name’. diff --git a/docs/en/engines/table-engines/special/file.md b/docs/en/engines/table-engines/special/file.md index fdf5242ba3b..0d422f64762 100644 --- a/docs/en/engines/table-engines/special/file.md +++ b/docs/en/engines/table-engines/special/file.md @@ -14,6 +14,10 @@ Usage scenarios: - Convert data from one format to another. - Updating data in ClickHouse via editing a file on a disk. +:::note +This engine is not currently available in ClickHouse Cloud, please [use the S3 table function instead](/docs/en/sql-reference/table-functions/s3.md). +::: + ## Usage in ClickHouse Server {#usage-in-clickhouse-server} ``` sql diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index a137eb2bdf2..66d5bd2e574 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -197,6 +197,7 @@ SELECT * FROM nestedt FORMAT TSV - [input_format_tsv_enum_as_number](/docs/en/operations/settings/settings-formats.md/#input_format_tsv_enum_as_number) - treat inserted enum values in TSV formats as enum indices. Default value - `false`. - [input_format_tsv_use_best_effort_in_schema_inference](/docs/en/operations/settings/settings-formats.md/#input_format_tsv_use_best_effort_in_schema_inference) - use some tweaks and heuristics to infer schema in TSV format. If disabled, all fields will be inferred as Strings. Default value - `true`. - [output_format_tsv_crlf_end_of_line](/docs/en/operations/settings/settings-formats.md/#output_format_tsv_crlf_end_of_line) - if it is set true, end of line in TSV output format will be `\r\n` instead of `\n`. Default value - `false`. +- [input_format_tsv_crlf_end_of_line](/docs/en/operations/settings/settings-formats.md/#input_format_tsv_crlf_end_of_line) - if it is set true, end of line in TSV input format will be `\r\n` instead of `\n`. Default value - `false`. - [input_format_tsv_skip_first_lines](/docs/en/operations/settings/settings-formats.md/#input_format_tsv_skip_first_lines) - skip specified number of lines at the beginning of data. Default value - `0`. - [input_format_tsv_detect_header](/docs/en/operations/settings/settings-formats.md/#input_format_tsv_detect_header) - automatically detect header with names and types in TSV format. Default value - `true`. - [input_format_tsv_skip_trailing_empty_lines](/docs/en/operations/settings/settings-formats.md/#input_format_tsv_skip_trailing_empty_lines) - skip trailing empty lines at the end of data. Default value - `false`. diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index 28831404a1f..a5fe74fd0c6 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -561,6 +561,25 @@ Default value: 5000 400 ``` +## max\_view\_num\_to\_warn {#max-view-num-to-warn} +If the number of attached views exceeds the specified value, clickhouse server will add warning messages to `system.warnings` table. +Default value: 10000 + +**Example** + +``` xml +400 +``` + +## max\_dictionary\_num\_to\_warn {#max-dictionary-num-to-warn} +If the number of attached dictionaries exceeds the specified value, clickhouse server will add warning messages to `system.warnings` table. +Default value: 1000 + +**Example** + +``` xml +400 +``` ## max\_part\_num\_to\_warn {#max-part-num-to-warn} If the number of active parts exceeds the specified value, clickhouse server will add warning messages to `system.warnings` table. diff --git a/docs/en/operations/settings/query-complexity.md b/docs/en/operations/settings/query-complexity.md index d86f18ff982..2a20e74e20f 100644 --- a/docs/en/operations/settings/query-complexity.md +++ b/docs/en/operations/settings/query-complexity.md @@ -303,7 +303,7 @@ What to do when the amount of data exceeds one of the limits: ‘throw’ or ‘ Limits the number of rows in the hash table that is used when joining tables. -This settings applies to [SELECT … JOIN](../../sql-reference/statements/select/join.md#select-join) operations and the [Join](../../engines/table-engines/special/join.md) table engine. +This settings applies to [SELECT ... JOIN](../../sql-reference/statements/select/join.md#select-join) operations and the [Join](../../engines/table-engines/special/join.md) table engine. If a query contains multiple joins, ClickHouse checks this setting for every intermediate result. @@ -320,7 +320,7 @@ Default value: 0. Limits the size in bytes of the hash table used when joining tables. -This setting applies to [SELECT … JOIN](../../sql-reference/statements/select/join.md#select-join) operations and [Join table engine](../../engines/table-engines/special/join.md). +This setting applies to [SELECT ... JOIN](../../sql-reference/statements/select/join.md#select-join) operations and [Join table engine](../../engines/table-engines/special/join.md). If the query contains joins, ClickHouse checks this setting for every intermediate result. diff --git a/docs/en/operations/settings/settings-formats.md b/docs/en/operations/settings/settings-formats.md index 6666f68c177..1a27b350652 100644 --- a/docs/en/operations/settings/settings-formats.md +++ b/docs/en/operations/settings/settings-formats.md @@ -831,7 +831,13 @@ Default value: `0`. ### output_format_tsv_crlf_end_of_line {#output_format_tsv_crlf_end_of_line} -Use DOC/Windows-style line separator (CRLF) in TSV instead of Unix style (LF). +Use DOS/Windows-style line separator (CRLF) in TSV instead of Unix style (LF). + +Disabled by default. + +### input_format_tsv_crlf_end_of_line {#input_format_tsv_crlf_end_of_line} + +Use DOS/Windows-style line separator (CRLF) for TSV input files instead of Unix style (LF). Disabled by default. diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 91b544c6a82..252b041ef6f 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -2248,7 +2248,7 @@ Default value: 0. ## count_distinct_implementation {#count_distinct_implementation} -Specifies which of the `uniq*` functions should be used to perform the [COUNT(DISTINCT …)](../../sql-reference/aggregate-functions/reference/count.md/#agg_function-count) construction. +Specifies which of the `uniq*` functions should be used to perform the [COUNT(DISTINCT ...)](../../sql-reference/aggregate-functions/reference/count.md/#agg_function-count) construction. Possible values: @@ -3665,6 +3665,26 @@ Possible values: Default value: `0`. +## s3_ignore_file_doesnt_exist {#s3_ignore_file_doesnt_exist} + +Ignore absence of file if it does not exist when reading certain keys. + +Possible values: +- 1 — `SELECT` returns empty result. +- 0 — `SELECT` throws an exception. + +Default value: `0`. + +## s3_validate_request_settings {#s3_validate_request_settings} + +Enables s3 request settings validation. + +Possible values: +- 1 — validate settings. +- 0 — do not validate settings. + +Default value: `1`. + ## hdfs_truncate_on_insert {#hdfs_truncate_on_insert} Enables or disables truncation before an insert in hdfs engine tables. If disabled, an exception will be thrown on an attempt to insert if a file in HDFS already exists. @@ -3697,6 +3717,56 @@ Possible values: Default value: `0`. +## hdfs_throw_on_zero_files_match {#hdfs_throw_on_zero_files_match} + +Throw an error if matched zero files according to glob expansion rules. + +Possible values: +- 1 — `SELECT` throws an exception. +- 0 — `SELECT` returns empty result. + +Default value: `0`. + +## hdfs_ignore_file_doesnt_exist {#hdfs_ignore_file_doesnt_exist} + +Ignore absence of file if it does not exist when reading certain keys. + +Possible values: +- 1 — `SELECT` returns empty result. +- 0 — `SELECT` throws an exception. + +Default value: `0`. + +## azure_throw_on_zero_files_match {#azure_throw_on_zero_files_match} + +Throw an error if matched zero files according to glob expansion rules. + +Possible values: +- 1 — `SELECT` throws an exception. +- 0 — `SELECT` returns empty result. + +Default value: `0`. + +## azure_ignore_file_doesnt_exist {#azure_ignore_file_doesnt_exist} + +Ignore absence of file if it does not exist when reading certain keys. + +Possible values: +- 1 — `SELECT` returns empty result. +- 0 — `SELECT` throws an exception. + +Default value: `0`. + +## azure_skip_empty_files {#azure_skip_empty_files} + +Enables or disables skipping empty files in S3 engine. + +Possible values: +- 0 — `SELECT` throws an exception if empty file is not compatible with requested format. +- 1 — `SELECT` returns empty result for empty file. + +Default value: `0`. + ## engine_url_skip_empty_files {#engine_url_skip_empty_files} Enables or disables skipping empty files in [URL](../../engines/table-engines/special/url.md) engine tables. @@ -5468,3 +5538,15 @@ Defines how MySQL types are converted to corresponding ClickHouse types. A comma - `datetime64`: convert `DATETIME` and `TIMESTAMP` types to `DateTime64` instead of `DateTime` when precision is not `0`. - `date2Date32`: convert `DATE` to `Date32` instead of `Date`. Takes precedence over `date2String`. - `date2String`: convert `DATE` to `String` instead of `Date`. Overridden by `datetime64`. + +## cross_join_min_rows_to_compress + +Minimal count of rows to compress block in CROSS JOIN. Zero value means - disable this threshold. This block is compressed when any of the two thresholds (by rows or by bytes) are reached. + +Default value: `10000000`. + +## cross_join_min_bytes_to_compress + +Minimal size of block to compress in CROSS JOIN. Zero value means - disable this threshold. This block is compressed when any of the two thresholds (by rows or by bytes) are reached. + +Default value: `1GiB`. diff --git a/docs/en/operations/storing-data.md b/docs/en/operations/storing-data.md index 9b316960750..53ecd66396d 100644 --- a/docs/en/operations/storing-data.md +++ b/docs/en/operations/storing-data.md @@ -421,6 +421,7 @@ Other parameters: * `skip_access_check` - If true, disk access checks will not be performed on disk start-up. Default value is `false`. * `read_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of read requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk). * `write_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of write requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk). +* `metadata_keep_free_space_bytes` - the amount of free metadata disk space to be reserved. Examples of working configurations can be found in integration tests directory (see e.g. [test_merge_tree_azure_blob_storage](https://github.com/ClickHouse/ClickHouse/blob/master/tests/integration/test_merge_tree_azure_blob_storage/configs/config.d/storage_conf.xml) or [test_azure_blob_storage_zero_copy_replication](https://github.com/ClickHouse/ClickHouse/blob/master/tests/integration/test_azure_blob_storage_zero_copy_replication/configs/config.d/storage_conf.xml)). diff --git a/docs/en/sql-reference/aggregate-functions/parametric-functions.md b/docs/en/sql-reference/aggregate-functions/parametric-functions.md index 8981ac1f752..1dc89b8dcf9 100644 --- a/docs/en/sql-reference/aggregate-functions/parametric-functions.md +++ b/docs/en/sql-reference/aggregate-functions/parametric-functions.md @@ -82,7 +82,7 @@ FROM In this case, you should remember that you do not know the histogram bin borders. -## sequenceMatch(pattern)(timestamp, cond1, cond2, …) +## sequenceMatch(pattern)(timestamp, cond1, cond2, ...) Checks whether the sequence contains an event chain that matches the pattern. @@ -172,7 +172,7 @@ SELECT sequenceMatch('(?1)(?2)')(time, number = 1, number = 2, number = 4) FROM - [sequenceCount](#function-sequencecount) -## sequenceCount(pattern)(time, cond1, cond2, …) +## sequenceCount(pattern)(time, cond1, cond2, ...) Counts the number of event chains that matched the pattern. The function searches event chains that do not overlap. It starts to search for the next chain after the current chain is matched. diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiles.md b/docs/en/sql-reference/aggregate-functions/reference/quantiles.md index e2a5bc53e32..856d447ac13 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantiles.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantiles.md @@ -7,7 +7,7 @@ sidebar_position: 201 ## quantiles -Syntax: `quantiles(level1, level2, …)(x)` +Syntax: `quantiles(level1, level2, ...)(x)` All the quantile functions also have corresponding quantiles functions: `quantiles`, `quantilesDeterministic`, `quantilesTiming`, `quantilesTimingWeighted`, `quantilesExact`, `quantilesExactWeighted`, `quantileInterpolatedWeighted`, `quantilesTDigest`, `quantilesBFloat16`, `quantilesDD`. These functions calculate all the quantiles of the listed levels in one pass, and return an array of the resulting values. diff --git a/docs/en/sql-reference/data-types/aggregatefunction.md b/docs/en/sql-reference/data-types/aggregatefunction.md index 87511a505dc..37f0d0e50ae 100644 --- a/docs/en/sql-reference/data-types/aggregatefunction.md +++ b/docs/en/sql-reference/data-types/aggregatefunction.md @@ -6,9 +6,9 @@ sidebar_label: AggregateFunction # AggregateFunction -Aggregate functions can have an implementation-defined intermediate state that can be serialized to an `AggregateFunction(…)` data type and stored in a table, usually, by means of [a materialized view](../../sql-reference/statements/create/view.md). The common way to produce an aggregate function state is by calling the aggregate function with the `-State` suffix. To get the final result of aggregation in the future, you must use the same aggregate function with the `-Merge`suffix. +Aggregate functions can have an implementation-defined intermediate state that can be serialized to an `AggregateFunction(...)` data type and stored in a table, usually, by means of [a materialized view](../../sql-reference/statements/create/view.md). The common way to produce an aggregate function state is by calling the aggregate function with the `-State` suffix. To get the final result of aggregation in the future, you must use the same aggregate function with the `-Merge`suffix. -`AggregateFunction(name, types_of_arguments…)` — parametric data type. +`AggregateFunction(name, types_of_arguments...)` — parametric data type. **Parameters** diff --git a/docs/en/sql-reference/data-types/dynamic.md b/docs/en/sql-reference/data-types/dynamic.md new file mode 100644 index 00000000000..955fd54e641 --- /dev/null +++ b/docs/en/sql-reference/data-types/dynamic.md @@ -0,0 +1,495 @@ +--- +slug: /en/sql-reference/data-types/dynamic +sidebar_position: 56 +sidebar_label: Dynamic +--- + +# Dynamic + +This type allows to store values of any type inside it without knowing all of them in advance. + +To declare a column of `Dynamic` type, use the following syntax: + +``` sql + Dynamic(max_types=N) +``` + +Where `N` is an optional parameter between `1` and `255` indicating how many different data types can be stored inside a column with type `Dynamic` across single block of data that is stored separately (for example across single data part for MergeTree table). If this limit is exceeded, all new types will be converted to type `String`. Default value of `max_types` is `32`. + +:::note +The Dynamic data type is an experimental feature. To use it, set `allow_experimental_dynamic_type = 1`. +::: + +## Creating Dynamic + +Using `Dynamic` type in table column definition: + +```sql +CREATE TABLE test (d Dynamic) ENGINE = Memory; +INSERT INTO test VALUES (NULL), (42), ('Hello, World!'), ([1, 2, 3]); +SELECT d, dynamicType(d) FROM test; +``` + +```text +┌─d─────────────┬─dynamicType(d)─┐ +│ ᴺᵁᴸᴸ │ None │ +│ 42 │ Int64 │ +│ Hello, World! │ String │ +│ [1,2,3] │ Array(Int64) │ +└───────────────┴────────────────┘ +``` + +Using CAST from ordinary column: + +```sql +SELECT 'Hello, World!'::Dynamic as d, dynamicType(d); +``` + +```text +┌─d─────────────┬─dynamicType(d)─┐ +│ Hello, World! │ String │ +└───────────────┴────────────────┘ +``` + +Using CAST from `Variant` column: + +```sql +SET allow_experimental_variant_type = 1, use_variant_as_common_type = 1; +SELECT multiIf((number % 3) = 0, number, (number % 3) = 1, range(number + 1), NULL)::Dynamic AS d, dynamicType(d) FROM numbers(3) +``` + +```text +┌─d─────┬─dynamicType(d)─┐ +│ 0 │ UInt64 │ +│ [0,1] │ Array(UInt64) │ +│ ᴺᵁᴸᴸ │ None │ +└───────┴────────────────┘ +``` + + +## Reading Dynamic nested types as subcolumns + +`Dynamic` type supports reading a single nested type from a `Dynamic` column using the type name as a subcolumn. +So, if you have column `d Dynamic` you can read a subcolumn of any valid type `T` using syntax `d.T`, +this subcolumn will have type `Nullable(T)` if `T` can be inside `Nullable` and `T` otherwise. This subcolumn will +be the same size as original `Dynamic` column and will contain `NULL` values (or empty values if `T` cannot be inside `Nullable`) +in all rows in which original `Dynamic` column doesn't have type `T`. + +`Dynamic` subcolumns can be also read using function `dynamicElement(dynamic_column, type_name)`. + +Examples: + +```sql +CREATE TABLE test (d Dynamic) ENGINE = Memory; +INSERT INTO test VALUES (NULL), (42), ('Hello, World!'), ([1, 2, 3]); +SELECT d, dynamicType(d), d.String, d.Int64, d.`Array(Int64)`, d.Date, d.`Array(String)` FROM test; +``` + +```text +┌─d─────────────┬─dynamicType(d)─┬─d.String──────┬─d.Int64─┬─d.Array(Int64)─┬─d.Date─┬─d.Array(String)─┐ +│ ᴺᵁᴸᴸ │ None │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [] │ ᴺᵁᴸᴸ │ [] │ +│ 42 │ Int64 │ ᴺᵁᴸᴸ │ 42 │ [] │ ᴺᵁᴸᴸ │ [] │ +│ Hello, World! │ String │ Hello, World! │ ᴺᵁᴸᴸ │ [] │ ᴺᵁᴸᴸ │ [] │ +│ [1,2,3] │ Array(Int64) │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [1,2,3] │ ᴺᵁᴸᴸ │ [] │ +└───────────────┴────────────────┴───────────────┴─────────┴────────────────┴────────┴─────────────────┘ +``` + +```sql +SELECT toTypeName(d.String), toTypeName(d.Int64), toTypeName(d.`Array(Int64)`), toTypeName(d.Date), toTypeName(d.`Array(String)`) FROM test LIMIT 1; +``` + +```text +┌─toTypeName(d.String)─┬─toTypeName(d.Int64)─┬─toTypeName(d.Array(Int64))─┬─toTypeName(d.Date)─┬─toTypeName(d.Array(String))─┐ +│ Nullable(String) │ Nullable(Int64) │ Array(Int64) │ Nullable(Date) │ Array(String) │ +└──────────────────────┴─────────────────────┴────────────────────────────┴────────────────────┴─────────────────────────────┘ +``` + +```sql +SELECT d, dynamicType(d), dynamicElement(d, 'String'), dynamicElement(d, 'Int64'), dynamicElement(d, 'Array(Int64)'), dynamicElement(d, 'Date'), dynamicElement(d, 'Array(String)') FROM test;``` +``` + +```text +┌─d─────────────┬─dynamicType(d)─┬─dynamicElement(d, 'String')─┬─dynamicElement(d, 'Int64')─┬─dynamicElement(d, 'Array(Int64)')─┬─dynamicElement(d, 'Date')─┬─dynamicElement(d, 'Array(String)')─┐ +│ ᴺᵁᴸᴸ │ None │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [] │ ᴺᵁᴸᴸ │ [] │ +│ 42 │ Int64 │ ᴺᵁᴸᴸ │ 42 │ [] │ ᴺᵁᴸᴸ │ [] │ +│ Hello, World! │ String │ Hello, World! │ ᴺᵁᴸᴸ │ [] │ ᴺᵁᴸᴸ │ [] │ +│ [1,2,3] │ Array(Int64) │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [1,2,3] │ ᴺᵁᴸᴸ │ [] │ +└───────────────┴────────────────┴─────────────────────────────┴────────────────────────────┴───────────────────────────────────┴───────────────────────────┴────────────────────────────────────┘ +``` + +To know what variant is stored in each row function `dynamicType(dynamic_column)` can be used. It returns `String` with value type name for each row (or `'None'` if row is `NULL`). + +Example: + +```sql +CREATE TABLE test (d Dynamic) ENGINE = Memory; +INSERT INTO test VALUES (NULL), (42), ('Hello, World!'), ([1, 2, 3]); +SELECT dynamicType(d) from test; +``` + +```text +┌─dynamicType(d)─┐ +│ None │ +│ Int64 │ +│ String │ +│ Array(Int64) │ +└────────────────┘ +``` + +## Conversion between Dynamic column and other columns + +There are 4 possible conversions that can be performed with `Dynamic` column. + +### Converting an ordinary column to a Dynamic column + +```sql +SELECT 'Hello, World!'::Dynamic as d, dynamicType(d); +``` + +```text +┌─d─────────────┬─dynamicType(d)─┐ +│ Hello, World! │ String │ +└───────────────┴────────────────┘ +``` + +### Converting a String column to a Dynamic column through parsing + +To parse `Dynamic` type values from a `String` column you can enable setting `cast_string_to_dynamic_use_inference`: + +```sql +SET cast_string_to_dynamic_use_inference = 1; +SELECT CAST(materialize(map('key1', '42', 'key2', 'true', 'key3', '2020-01-01')), 'Map(String, Dynamic)') as map_of_dynamic, mapApply((k, v) -> (k, dynamicType(v)), map_of_dynamic) as map_of_dynamic_types; +``` + +```text +┌─map_of_dynamic──────────────────────────────┬─map_of_dynamic_types─────────────────────────┐ +│ {'key1':42,'key2':true,'key3':'2020-01-01'} │ {'key1':'Int64','key2':'Bool','key3':'Date'} │ +└─────────────────────────────────────────────┴──────────────────────────────────────────────┘ +``` + +### Converting a Dynamic column to an ordinary column + +It is possible to convert a `Dynamic` column to an ordinary column. In this case all nested types will be converted to a destination type: + +```sql +CREATE TABLE test (d Dynamic) ENGINE = Memory; +INSERT INTO test VALUES (NULL), (42), ('42.42'), (true), ('e10'); +SELECT d::Nullable(Float64) FROM test; +``` + +```text +┌─CAST(d, 'Nullable(Float64)')─┐ +│ ᴺᵁᴸᴸ │ +│ 42 │ +│ 42.42 │ +│ 1 │ +│ 0 │ +└──────────────────────────────┘ +``` + +### Converting a Variant column to Dynamic column + +```sql +CREATE TABLE test (v Variant(UInt64, String, Array(UInt64))) ENGINE = Memory; +INSERT INTO test VALUES (NULL), (42), ('String'), ([1, 2, 3]); +SELECT v::Dynamic as d, dynamicType(d) from test; +``` + +```text +┌─d───────┬─dynamicType(d)─┐ +│ ᴺᵁᴸᴸ │ None │ +│ 42 │ UInt64 │ +│ String │ String │ +│ [1,2,3] │ Array(UInt64) │ +└─────────┴────────────────┘ +``` + +### Converting a Dynamic(max_types=N) column to another Dynamic(max_types=K) + +If `K >= N` than during conversion the data doesn't change: + +```sql +CREATE TABLE test (d Dynamic(max_types=3)) ENGINE = Memory; +INSERT INTO test VALUES (NULL), (42), (43), ('42.42'), (true); +SELECT d::Dynamic(max_types=5) as d2, dynamicType(d2) FROM test; +``` + +```text +┌─d─────┬─dynamicType(d)─┐ +│ ᴺᵁᴸᴸ │ None │ +│ 42 │ Int64 │ +│ 43 │ Int64 │ +│ 42.42 │ String │ +│ true │ Bool │ +└───────┴────────────────┘ +``` + +If `K < N`, then the values with the rarest types are converted to `String`: +```text +CREATE TABLE test (d Dynamic(max_types=4)) ENGINE = Memory; +INSERT INTO test VALUES (NULL), (42), (43), ('42.42'), (true), ([1, 2, 3]); +SELECT d, dynamicType(d), d::Dynamic(max_types=2) as d2, dynamicType(d2) FROM test; +``` + +```text +┌─d───────┬─dynamicType(d)─┬─d2──────┬─dynamicType(d2)─┐ +│ ᴺᵁᴸᴸ │ None │ ᴺᵁᴸᴸ │ None │ +│ 42 │ Int64 │ 42 │ Int64 │ +│ 43 │ Int64 │ 43 │ Int64 │ +│ 42.42 │ String │ 42.42 │ String │ +│ true │ Bool │ true │ String │ +│ [1,2,3] │ Array(Int64) │ [1,2,3] │ String │ +└─────────┴────────────────┴─────────┴─────────────────┘ +``` + +If `K=1`, all types are converted to `String`: + +```text +CREATE TABLE test (d Dynamic(max_types=4)) ENGINE = Memory; +INSERT INTO test VALUES (NULL), (42), (43), ('42.42'), (true), ([1, 2, 3]); +SELECT d, dynamicType(d), d::Dynamic(max_types=1) as d2, dynamicType(d2) FROM test; +``` + +```text +┌─d───────┬─dynamicType(d)─┬─d2──────┬─dynamicType(d2)─┐ +│ ᴺᵁᴸᴸ │ None │ ᴺᵁᴸᴸ │ None │ +│ 42 │ Int64 │ 42 │ String │ +│ 43 │ Int64 │ 43 │ String │ +│ 42.42 │ String │ 42.42 │ String │ +│ true │ Bool │ true │ String │ +│ [1,2,3] │ Array(Int64) │ [1,2,3] │ String │ +└─────────┴────────────────┴─────────┴─────────────────┘ +``` + +## Reading Dynamic type from the data + +All text formats (TSV, CSV, CustomSeparated, Values, JSONEachRow, etc) supports reading `Dynamic` type. During data parsing ClickHouse tries to infer the type of each value and use it during insertion to `Dynamic` column. + +Example: + +```sql +SELECT + d, + dynamicType(d), + dynamicElement(d, 'String') AS str, + dynamicElement(d, 'Int64') AS num, + dynamicElement(d, 'Float64') AS float, + dynamicElement(d, 'Date') AS date, + dynamicElement(d, 'Array(Int64)') AS arr +FROM format(JSONEachRow, 'd Dynamic', $$ +{"d" : "Hello, World!"}, +{"d" : 42}, +{"d" : 42.42}, +{"d" : "2020-01-01"}, +{"d" : [1, 2, 3]} +$$) +``` + +```text +┌─d─────────────┬─dynamicType(d)─┬─str───────────┬──num─┬─float─┬───────date─┬─arr─────┐ +│ Hello, World! │ String │ Hello, World! │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [] │ +│ 42 │ Int64 │ ᴺᵁᴸᴸ │ 42 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [] │ +│ 42.42 │ Float64 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 42.42 │ ᴺᵁᴸᴸ │ [] │ +│ 2020-01-01 │ Date │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 2020-01-01 │ [] │ +│ [1,2,3] │ Array(Int64) │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [1,2,3] │ +└───────────────┴────────────────┴───────────────┴──────┴───────┴────────────┴─────────┘ +``` + +## Comparing values of Dynamic type + +Values of `Dynamic` types are compared similar to values of `Variant` type: +The result of operator `<` for values `d1` with underlying type `T1` and `d2` with underlying type `T2` of a type `Dynamic` is defined as follows: +- If `T1 = T2 = T`, the result will be `d1.T < d2.T` (underlying values will be compared). +- If `T1 != T2`, the result will be `T1 < T2` (type names will be compared). + +Examples: +```sql +CREATE TABLE test (d1 Dynamic, d2 Dynamic) ENGINE=Memory; +INSERT INTO test VALUES (42, 42), (42, 43), (42, 'abc'), (42, [1, 2, 3]), (42, []), (42, NULL); +``` + +```sql +SELECT d2, dynamicType(d2) as d2_type from test order by d2; +``` + +```text +┌─d2──────┬─d2_type──────┐ +│ [] │ Array(Int64) │ +│ [1,2,3] │ Array(Int64) │ +│ 42 │ Int64 │ +│ 43 │ Int64 │ +│ abc │ String │ +│ ᴺᵁᴸᴸ │ None │ +└─────────┴──────────────┘ +``` + +```sql +SELECT d1, dynamicType(d1) as d1_type, d2, dynamicType(d2) as d2_type, d1 = d2, d1 < d2, d1 > d2 from test; +``` + +```text +┌─d1─┬─d1_type─┬─d2──────┬─d2_type──────┬─equals(d1, d2)─┬─less(d1, d2)─┬─greater(d1, d2)─┐ +│ 42 │ Int64 │ 42 │ Int64 │ 1 │ 0 │ 0 │ +│ 42 │ Int64 │ 43 │ Int64 │ 0 │ 1 │ 0 │ +│ 42 │ Int64 │ abc │ String │ 0 │ 1 │ 0 │ +│ 42 │ Int64 │ [1,2,3] │ Array(Int64) │ 0 │ 0 │ 1 │ +│ 42 │ Int64 │ [] │ Array(Int64) │ 0 │ 0 │ 1 │ +│ 42 │ Int64 │ ᴺᵁᴸᴸ │ None │ 0 │ 1 │ 0 │ +└────┴─────────┴─────────┴──────────────┴────────────────┴──────────────┴─────────────────┘ +``` + +If you need to find the row with specific `Dynamic` value, you can do one of the following: + +- Cast value to the `Dynamic` type: + +```sql +SELECT * FROM test WHERE d2 == [1,2,3]::Array(UInt32)::Dynamic; +``` + +```text +┌─d1─┬─d2──────┐ +│ 42 │ [1,2,3] │ +└────┴─────────┘ +``` + +- Compare `Dynamic` subcolumn with required type: + +```sql +SELECT * FROM test WHERE d2.`Array(Int65)` == [1,2,3] -- or using variantElement(d2, 'Array(UInt32)') +``` + +```text +┌─d1─┬─d2──────┐ +│ 42 │ [1,2,3] │ +└────┴─────────┘ +``` + +Sometimes it can be useful to make additional check on dynamic type as subcolumns with complex types like `Array/Map/Tuple` cannot be inside `Nullable` and will have default values instead of `NULL` on rows with different types: + +```sql +SELECT d2, d2.`Array(Int64)`, dynamicType(d2) FROM test WHERE d2.`Array(Int64)` == []; +``` + +```text +┌─d2───┬─d2.Array(UInt32)─┬─dynamicType(d2)─┐ +│ 42 │ [] │ Int64 │ +│ 43 │ [] │ Int64 │ +│ abc │ [] │ String │ +│ [] │ [] │ Array(Int32) │ +│ ᴺᵁᴸᴸ │ [] │ None │ +└──────┴──────────────────┴─────────────────┘ +``` + +```sql +SELECT d2, d2.`Array(Int64)`, dynamicType(d2) FROM test WHERE dynamicType(d2) == 'Array(Int64)' AND d2.`Array(Int64)` == []; +``` + +```text +┌─d2─┬─d2.Array(UInt32)─┬─dynamicType(d2)─┐ +│ [] │ [] │ Array(Int64) │ +└────┴──────────────────┴─────────────────┘ +``` + +**Note:** values of dynamic types with different numeric types are considered as different values and not compared between each other, their type names are compared instead. + +Example: + +```sql +CREATE TABLE test (d Dynamic) ENGINE=Memory; +INSERT INTO test VALUES (1::UInt32), (1::Int64), (100::UInt32), (100::Int64); +SELECT d, dynamicType(d) FROM test ORDER by d; +``` + +```text +┌─v───┬─dynamicType(v)─┐ +│ 1 │ Int64 │ +│ 100 │ Int64 │ +│ 1 │ UInt32 │ +│ 100 │ UInt32 │ +└─────┴────────────────┘ +``` + +## Reaching the limit in number of different data types stored inside Dynamic + +`Dynamic` data type can store only limited number of different data types inside. By default, this limit is 32, but you can change it in type declaration using syntax `Dynamic(max_types=N)` where N is between 1 and 255 (due to implementation details, it's impossible to have more than 255 different data types inside Dynamic). +When the limit is reached, all new data types inserted to `Dynamic` column will be casted to `String` and stored as `String` values. + +Let's see what happens when the limit is reached in different scenarios. + +### Reaching the limit during data parsing + +During parsing of `Dynamic` values from the data, when the limit is reached for current block of data, all new values will be inserted as `String` values: + +```sql +SELECT d, dynamicType(d) FROM format(JSONEachRow, 'd Dynamic(max_types=3)', ' +{"d" : 42} +{"d" : [1, 2, 3]} +{"d" : "Hello, World!"} +{"d" : "2020-01-01"} +{"d" : ["str1", "str2", "str3"]} +{"d" : {"a" : 1, "b" : [1, 2, 3]}} +') +``` + +```text +┌─d──────────────────────────┬─dynamicType(d)─┐ +│ 42 │ Int64 │ +│ [1,2,3] │ Array(Int64) │ +│ Hello, World! │ String │ +│ 2020-01-01 │ String │ +│ ["str1", "str2", "str3"] │ String │ +│ {"a" : 1, "b" : [1, 2, 3]} │ String │ +└────────────────────────────┴────────────────┘ +``` + +As we can see, after inserting 3 different data types `Int64`, `Array(Int64)` and `String` all new types were converted to `String`. + +### During merges of data parts in MergeTree table engines + +During merge of several data parts in MergeTree table the `Dynamic` column in the resulting data part can reach the limit of different data types inside and won't be able to store all types from source parts. +In this case ClickHouse chooses what types will remain after merge and what types will be casted to `String`. In most cases ClickHouse tries to keep the most frequent types and cast the rarest types to `String`, but it depends on the implementation. + +Let's see an example of such merge. First, let's create a table with `Dynamic` column, set the limit of different data types to `3` and insert values with `5` different types: + +```sql +CREATE TABLE test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree ORDER BY id; +SYSTEM STOP MERGES test; +INSERT INTO test SELECT number, number FROM numbers(5); +INSERT INTO test SELECT number, range(number) FROM numbers(4); +INSERT INTO test SELECT number, toDate(number) FROM numbers(3); +INSERT INTO test SELECT number, map(number, number) FROM numbers(2); +INSERT INTO test SELECT number, 'str_' || toString(number) FROM numbers(1); +``` + +Each insert will create a separate data pert with `Dynamic` column containing single type: +```sql +SELECT count(), dynamicType(d), _part FROM test GROUP BY _part, dynamicType(d) ORDER BY _part; +``` + +```text +┌─count()─┬─dynamicType(d)──────┬─_part─────┐ +│ 5 │ UInt64 │ all_1_1_0 │ +│ 4 │ Array(UInt64) │ all_2_2_0 │ +│ 3 │ Date │ all_3_3_0 │ +│ 2 │ Map(UInt64, UInt64) │ all_4_4_0 │ +│ 1 │ String │ all_5_5_0 │ +└─────────┴─────────────────────┴───────────┘ +``` + +Now, let's merge all parts into one and see what will happen: + +```sql +SYSTEM START MERGES test; +OPTIMIZE TABLE test FINAL; +SELECT count(), dynamicType(d), _part FROM test GROUP BY _part, dynamicType(d) ORDER BY _part; +``` + +```text +┌─count()─┬─dynamicType(d)─┬─_part─────┐ +│ 5 │ UInt64 │ all_1_5_2 │ +│ 6 │ String │ all_1_5_2 │ +│ 4 │ Array(UInt64) │ all_1_5_2 │ +└─────────┴────────────────┴───────────┘ +``` + +As we can see, ClickHouse kept the most frequent types `UInt64` and `Array(UInt64)` and casted all other types to `String`. diff --git a/docs/en/sql-reference/data-types/fixedstring.md b/docs/en/sql-reference/data-types/fixedstring.md index 0316df7fe34..0c021b28f74 100644 --- a/docs/en/sql-reference/data-types/fixedstring.md +++ b/docs/en/sql-reference/data-types/fixedstring.md @@ -21,8 +21,8 @@ The `FixedString` type is efficient when data has the length of precisely `N` by Examples of the values that can be efficiently stored in `FixedString`-typed columns: - The binary representation of IP addresses (`FixedString(16)` for IPv6). -- Language codes (ru_RU, en_US … ). -- Currency codes (USD, RUB … ). +- Language codes (ru_RU, en_US ... ). +- Currency codes (USD, RUB ... ). - Binary representation of hashes (`FixedString(16)` for MD5, `FixedString(32)` for SHA256). To store UUID values, use the [UUID](../../sql-reference/data-types/uuid.md) data type. diff --git a/docs/en/sql-reference/data-types/nested-data-structures/index.md b/docs/en/sql-reference/data-types/nested-data-structures/index.md index d118170cd39..579ee9bfa8b 100644 --- a/docs/en/sql-reference/data-types/nested-data-structures/index.md +++ b/docs/en/sql-reference/data-types/nested-data-structures/index.md @@ -6,7 +6,7 @@ sidebar_label: Nested(Name1 Type1, Name2 Type2, ...) # Nested -## Nested(name1 Type1, Name2 Type2, …) +## Nested(name1 Type1, Name2 Type2, ...) A nested data structure is like a table inside a cell. The parameters of a nested data structure – the column names and types – are specified the same way as in a [CREATE TABLE](../../../sql-reference/statements/create/table.md) query. Each table row can correspond to any number of rows in a nested data structure. diff --git a/docs/en/sql-reference/data-types/simpleaggregatefunction.md b/docs/en/sql-reference/data-types/simpleaggregatefunction.md index 39f8409c1e1..4fb74ac30e4 100644 --- a/docs/en/sql-reference/data-types/simpleaggregatefunction.md +++ b/docs/en/sql-reference/data-types/simpleaggregatefunction.md @@ -5,7 +5,7 @@ sidebar_label: SimpleAggregateFunction --- # SimpleAggregateFunction -`SimpleAggregateFunction(name, types_of_arguments…)` data type stores current value of the aggregate function, and does not store its full state as [`AggregateFunction`](../../sql-reference/data-types/aggregatefunction.md) does. This optimization can be applied to functions for which the following property holds: the result of applying a function `f` to a row set `S1 UNION ALL S2` can be obtained by applying `f` to parts of the row set separately, and then again applying `f` to the results: `f(S1 UNION ALL S2) = f(f(S1) UNION ALL f(S2))`. This property guarantees that partial aggregation results are enough to compute the combined one, so we do not have to store and process any extra data. +`SimpleAggregateFunction(name, types_of_arguments...)` data type stores current value of the aggregate function, and does not store its full state as [`AggregateFunction`](../../sql-reference/data-types/aggregatefunction.md) does. This optimization can be applied to functions for which the following property holds: the result of applying a function `f` to a row set `S1 UNION ALL S2` can be obtained by applying `f` to parts of the row set separately, and then again applying `f` to the results: `f(S1 UNION ALL S2) = f(f(S1) UNION ALL f(S2))`. This property guarantees that partial aggregation results are enough to compute the combined one, so we do not have to store and process any extra data. The common way to produce an aggregate function value is by calling the aggregate function with the [-SimpleState](../../sql-reference/aggregate-functions/combinators.md#agg-functions-combinator-simplestate) suffix. diff --git a/docs/en/sql-reference/functions/arithmetic-functions.md b/docs/en/sql-reference/functions/arithmetic-functions.md index 6d95f3dc358..e3fb1d91c05 100644 --- a/docs/en/sql-reference/functions/arithmetic-functions.md +++ b/docs/en/sql-reference/functions/arithmetic-functions.md @@ -77,7 +77,7 @@ Alias: `a * b` (operator) ## divide -Calculates the quotient of two values `a` and `b`. The result type is always [Float64](../../sql-reference/data-types/float.md). Integer division is provided by the `intDiv` function. +Calculates the quotient of two values `a` and `b`. The result type is always [Float64](../data-types/float.md). Integer division is provided by the `intDiv` function. Division by 0 returns `inf`, `-inf`, or `nan`. @@ -140,11 +140,75 @@ Same as `intDiv` but returns zero when dividing by zero or when dividing a minim intDivOrZero(a, b) ``` +## isFinite + +Returns 1 if the Float32 or Float64 argument not infinite and not a NaN, otherwise this function returns 0. + +**Syntax** + +```sql +isFinite(x) +``` + +## isInfinite + +Returns 1 if the Float32 or Float64 argument is infinite, otherwise this function returns 0. Note that 0 is returned for a NaN. + +**Syntax** + +```sql +isInfinite(x) +``` + +## ifNotFinite + +Checks whether a floating point value is finite. + +**Syntax** + +```sql +ifNotFinite(x,y) +``` + +**Arguments** + +- `x` — Value to check for infinity. [Float\*](../data-types/float.md). +- `y` — Fallback value. [Float\*](../data-types/float.md). + +**Returned value** + +- `x` if `x` is finite. +- `y` if `x` is not finite. + +**Example** + +Query: + + SELECT 1/0 as infimum, ifNotFinite(infimum,42) + +Result: + + ┌─infimum─┬─ifNotFinite(divide(1, 0), 42)─┐ + │ inf │ 42 │ + └─────────┴───────────────────────────────┘ + +You can get similar result by using the [ternary operator](../../sql-reference/functions/conditional-functions.md#ternary-operator): `isFinite(x) ? x : y`. + +## isNaN + +Returns 1 if the Float32 and Float64 argument is NaN, otherwise this function 0. + +**Syntax** + +```sql +isNaN(x) +``` + ## modulo Calculates the remainder of the division of two values `a` by `b`. -The result type is an integer if both inputs are integers. If one of the inputs is a floating-point number, the result type is [Float64](../../sql-reference/data-types/float.md). +The result type is an integer if both inputs are integers. If one of the inputs is a floating-point number, the result type is [Float64](../data-types/float.md). The remainder is computed like in C++. Truncated division is used for negative numbers. @@ -248,7 +312,7 @@ lcm(a, b) ## max2 -Returns the bigger of two values `a` and `b`. The returned value is of type [Float64](../../sql-reference/data-types/float.md). +Returns the bigger of two values `a` and `b`. The returned value is of type [Float64](../data-types/float.md). **Syntax** @@ -274,7 +338,7 @@ Result: ## min2 -Returns the smaller of two values `a` and `b`. The returned value is of type [Float64](../../sql-reference/data-types/float.md). +Returns the smaller of two values `a` and `b`. The returned value is of type [Float64](../data-types/float.md). **Syntax** @@ -300,7 +364,7 @@ Result: ## multiplyDecimal -Multiplies two decimals `a` and `b`. The result value will be of type [Decimal256](../../sql-reference/data-types/decimal.md). +Multiplies two decimals `a` and `b`. The result value will be of type [Decimal256](../data-types/decimal.md). The scale of the result can be explicitly specified by `result_scale`. If `result_scale` is not specified, it is assumed to be the maximum scale of the input values. @@ -314,15 +378,13 @@ multiplyDecimal(a, b[, result_scale]) **Arguments** -- `a` — First value: [Decimal](../../sql-reference/data-types/decimal.md). -- `b` — Second value: [Decimal](../../sql-reference/data-types/decimal.md). -- `result_scale` — Scale of result: [Int/UInt](../../sql-reference/data-types/int-uint.md). +- `a` — First value. [Decimal](../data-types/decimal.md). +- `b` — Second value. [Decimal](../data-types/decimal.md). +- `result_scale` — Scale of result. [Int/UInt](../data-types/int-uint.md). **Returned value** -- The result of multiplication with given scale. - -Type: [Decimal256](../../sql-reference/data-types/decimal.md). +- The result of multiplication with given scale. [Decimal256](../data-types/decimal.md). **Example** @@ -376,7 +438,7 @@ Code: 407. DB::Exception: Received from localhost:9000. DB::Exception: Decimal m ## divideDecimal -Divides two decimals `a` and `b`. The result value will be of type [Decimal256](../../sql-reference/data-types/decimal.md). +Divides two decimals `a` and `b`. The result value will be of type [Decimal256](../data-types/decimal.md). The scale of the result can be explicitly specified by `result_scale`. If `result_scale` is not specified, it is assumed to be the maximum scale of the input values. @@ -390,15 +452,13 @@ divideDecimal(a, b[, result_scale]) **Arguments** -- `a` — First value: [Decimal](../../sql-reference/data-types/decimal.md). -- `b` — Second value: [Decimal](../../sql-reference/data-types/decimal.md). -- `result_scale` — Scale of result: [Int/UInt](../../sql-reference/data-types/int-uint.md). +- `a` — First value: [Decimal](../data-types/decimal.md). +- `b` — Second value: [Decimal](../data-types/decimal.md). +- `result_scale` — Scale of result: [Int/UInt](../data-types/int-uint.md). **Returned value** -- The result of division with given scale. - -Type: [Decimal256](../../sql-reference/data-types/decimal.md). +- The result of division with given scale. [Decimal256](../data-types/decimal.md). **Example** diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md index 87e733a4b0c..7b52fbff714 100644 --- a/docs/en/sql-reference/functions/array-functions.md +++ b/docs/en/sql-reference/functions/array-functions.md @@ -19,7 +19,7 @@ empty([x]) An array is considered empty if it does not contain any elements. :::note -Can be optimized by enabling the [`optimize_functions_to_subcolumns` setting](../../operations/settings/settings.md#optimize-functions-to-subcolumns). With `optimize_functions_to_subcolumns = 1` the function reads only [size0](../../sql-reference/data-types/array.md#array-size) subcolumn instead of reading and processing the whole array column. The query `SELECT empty(arr) FROM TABLE;` transforms to `SELECT arr.size0 = 0 FROM TABLE;`. +Can be optimized by enabling the [`optimize_functions_to_subcolumns` setting](../../operations/settings/settings.md#optimize-functions-to-subcolumns). With `optimize_functions_to_subcolumns = 1` the function reads only [size0](../data-types/array.md#array-size) subcolumn instead of reading and processing the whole array column. The query `SELECT empty(arr) FROM TABLE;` transforms to `SELECT arr.size0 = 0 FROM TABLE;`. ::: The function also works for [strings](string-functions.md#empty) or [UUID](uuid-functions.md#empty). @@ -30,9 +30,7 @@ The function also works for [strings](string-functions.md#empty) or [UUID](uuid- **Returned value** -- Returns `1` for an empty array or `0` for a non-empty array. - -Type: [UInt8](../data-types/int-uint.md). +- Returns `1` for an empty array or `0` for a non-empty array. [UInt8](../data-types/int-uint.md). **Example** @@ -63,7 +61,7 @@ notEmpty([x]) An array is considered non-empty if it contains at least one element. :::note -Can be optimized by enabling the [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns) setting. With `optimize_functions_to_subcolumns = 1` the function reads only [size0](../../sql-reference/data-types/array.md#array-size) subcolumn instead of reading and processing the whole array column. The query `SELECT notEmpty(arr) FROM table` transforms to `SELECT arr.size0 != 0 FROM TABLE`. +Can be optimized by enabling the [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns) setting. With `optimize_functions_to_subcolumns = 1` the function reads only [size0](../data-types/array.md#array-size) subcolumn instead of reading and processing the whole array column. The query `SELECT notEmpty(arr) FROM table` transforms to `SELECT arr.size0 != 0 FROM TABLE`. ::: The function also works for [strings](string-functions.md#notempty) or [UUID](uuid-functions.md#notempty). @@ -74,9 +72,7 @@ The function also works for [strings](string-functions.md#notempty) or [UUID](uu **Returned value** -- Returns `1` for a non-empty array or `0` for an empty array. - -Type: [UInt8](../data-types/int-uint.md). +- Returns `1` for a non-empty array or `0` for an empty array. [UInt8](../data-types/int-uint.md). **Example** @@ -100,7 +96,7 @@ Returns the number of items in the array. The result type is UInt64. The function also works for strings. -Can be optimized by enabling the [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns) setting. With `optimize_functions_to_subcolumns = 1` the function reads only [size0](../../sql-reference/data-types/array.md#array-size) subcolumn instead of reading and processing the whole array column. The query `SELECT length(arr) FROM table` transforms to `SELECT arr.size0 FROM TABLE`. +Can be optimized by enabling the [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns) setting. With `optimize_functions_to_subcolumns = 1` the function reads only [size0](../data-types/array.md#array-size) subcolumn instead of reading and processing the whole array column. The query `SELECT length(arr) FROM table` transforms to `SELECT arr.size0 FROM TABLE`. Alias: `OCTET_LENGTH` @@ -561,7 +557,7 @@ Result: └─────────────┴─────────────┴────────────────┴─────────────────┘ ``` -## array(x1, …), operator \[x1, …\] +## array(x1, ...), operator \[x1, ...\] Creates an array from the function arguments. The arguments must be constants and have types that have the smallest common type. At least one argument must be passed, because otherwise it isn’t clear which type of array to create. That is, you can’t use this function to create an empty array (to do that, use the ‘emptyArray\*’ function described above). @@ -581,7 +577,7 @@ arrayConcat(arrays) **Arguments** -- `arrays` – Arbitrary number of arguments of [Array](../../sql-reference/data-types/array.md) type. +- `arrays` – Arbitrary number of arguments of [Array](../data-types/array.md) type. **Example** @@ -768,9 +764,9 @@ SELECT indexOf([1, 3, NULL, NULL], NULL) Elements set to `NULL` are handled as normal values. -## arrayCount(\[func,\] arr1, …) +## arrayCount(\[func,\] arr1, ...) -Returns the number of elements for which `func(arr1[i], …, arrN[i])` returns something other than 0. If `func` is not specified, it returns the number of non-zero elements in the array. +Returns the number of elements for which `func(arr1[i], ..., arrN[i])` returns something other than 0. If `func` is not specified, it returns the number of non-zero elements in the array. Note that the `arrayCount` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument. @@ -797,9 +793,11 @@ The sizes of the two vectors must be equal. Arrays and Tuples may also contain m **Returned value** -- The dot product of the two vectors. +- The dot product of the two vectors. [Numeric](https://clickhouse.com/docs/en/native-protocol/columns#numeric-types). -Type: numeric - determined by the type of the arguments. If Arrays or Tuples contain mixed element types then the result type is the supertype. +:::note +The return type is determined by the type of the arguments. If Arrays or Tuples contain mixed element types then the result type is the supertype. +::: **Examples** @@ -847,7 +845,7 @@ SELECT countEqual([1, 2, NULL, NULL], NULL) ## arrayEnumerate(arr) -Returns the array \[1, 2, 3, …, length (arr) \] +Returns the array \[1, 2, 3, ..., length (arr) \] This function is normally used with ARRAY JOIN. It allows counting something just once for each array after applying ARRAY JOIN. Example: @@ -887,7 +885,7 @@ WHERE (CounterID = 160656) AND notEmpty(GoalsReached) This function can also be used in higher-order functions. For example, you can use it to get array indexes for elements that match a condition. -## arrayEnumerateUniq(arr, …) +## arrayEnumerateUniq(arr, ...) Returns an array the same size as the source array, indicating for each element what its position is among elements with the same value. For example: arrayEnumerateUniq(\[10, 20, 10, 30\]) = \[1, 1, 2, 1\]. @@ -1060,7 +1058,7 @@ arrayPushBack(array, single_value) **Arguments** - `array` – Array. -- `single_value` – A single value. Only numbers can be added to an array with numbers, and only strings can be added to an array of strings. When adding numbers, ClickHouse automatically sets the `single_value` type for the data type of the array. For more information about the types of data in ClickHouse, see “[Data types](../../sql-reference/data-types/index.md#data_types)”. Can be `NULL`. The function adds a `NULL` element to an array, and the type of array elements converts to `Nullable`. +- `single_value` – A single value. Only numbers can be added to an array with numbers, and only strings can be added to an array of strings. When adding numbers, ClickHouse automatically sets the `single_value` type for the data type of the array. For more information about the types of data in ClickHouse, see “[Data types](../data-types/index.md#data_types)”. Can be `NULL`. The function adds a `NULL` element to an array, and the type of array elements converts to `Nullable`. **Example** @@ -1085,7 +1083,7 @@ arrayPushFront(array, single_value) **Arguments** - `array` – Array. -- `single_value` – A single value. Only numbers can be added to an array with numbers, and only strings can be added to an array of strings. When adding numbers, ClickHouse automatically sets the `single_value` type for the data type of the array. For more information about the types of data in ClickHouse, see “[Data types](../../sql-reference/data-types/index.md#data_types)”. Can be `NULL`. The function adds a `NULL` element to an array, and the type of array elements converts to `Nullable`. +- `single_value` – A single value. Only numbers can be added to an array with numbers, and only strings can be added to an array of strings. When adding numbers, ClickHouse automatically sets the `single_value` type for the data type of the array. For more information about the types of data in ClickHouse, see “[Data types](../data-types/index.md#data_types)”. Can be `NULL`. The function adds a `NULL` element to an array, and the type of array elements converts to `Nullable`. **Example** @@ -1181,14 +1179,12 @@ arrayShingles(array, length) **Arguments** -- `array` — Input array [Array](../../sql-reference/data-types/array.md). +- `array` — Input array [Array](../data-types/array.md). - `length` — The length of each shingle. **Returned value** -- An array of generated shingles. - -Type: [Array](../../sql-reference/data-types/array.md). +- An array of generated shingles. [Array](../data-types/array.md). **Examples** @@ -1206,7 +1202,7 @@ Result: └───────────────────┘ ``` -## arraySort(\[func,\] arr, …) {#sort} +## arraySort(\[func,\] arr, ...) {#sort} Sorts the elements of the `arr` array in ascending order. If the `func` function is specified, sorting order is determined by the result of the `func` function applied to the elements of the array. If `func` accepts multiple arguments, the `arraySort` function is passed several arrays that the arguments of `func` will correspond to. Detailed examples are shown at the end of `arraySort` description. @@ -1307,11 +1303,11 @@ SELECT arraySort((x, y) -> -y, [0, 1, 2], [1, 2, 3]) as res; To improve sorting efficiency, the [Schwartzian transform](https://en.wikipedia.org/wiki/Schwartzian_transform) is used. ::: -## arrayPartialSort(\[func,\] limit, arr, …) +## arrayPartialSort(\[func,\] limit, arr, ...) Same as `arraySort` with additional `limit` argument allowing partial sorting. Returns an array of the same size as the original array where elements in range `[1..limit]` are sorted in ascending order. Remaining elements `(limit..N]` shall contain elements in unspecified order. -## arrayReverseSort(\[func,\] arr, …) {#reverse-sort} +## arrayReverseSort(\[func,\] arr, ...) {#reverse-sort} Sorts the elements of the `arr` array in descending order. If the `func` function is specified, `arr` is sorted according to the result of the `func` function applied to the elements of the array, and then the sorted array is reversed. If `func` accepts multiple arguments, the `arrayReverseSort` function is passed several arrays that the arguments of `func` will correspond to. Detailed examples are shown at the end of `arrayReverseSort` description. @@ -1412,7 +1408,7 @@ SELECT arrayReverseSort((x, y) -> -y, [4, 3, 5], [1, 2, 3]) AS res; └─────────┘ ``` -## arrayPartialReverseSort(\[func,\] limit, arr, …) +## arrayPartialReverseSort(\[func,\] limit, arr, ...) Same as `arrayReverseSort` with additional `limit` argument allowing partial sorting. Returns an array of the same size as the original array where elements in range `[1..limit]` are sorted in descending order. Remaining elements `(limit..N]` shall contain elements in unspecified order. @@ -1535,7 +1531,7 @@ Result: [3,9,1,4,5,6,7,8,2,10] ``` -## arrayUniq(arr, …) +## arrayUniq(arr, ...) If one argument is passed, it counts the number of different elements in the array. If multiple arguments are passed, it counts the number of different tuples of elements at corresponding positions in multiple arrays. @@ -1562,9 +1558,7 @@ arrayDifference(array) **Returned values** -Returns an array of differences between adjacent array elements. - -Type: [UInt\*](https://clickhouse.com/docs/en/data_types/int_uint/#uint-ranges), [Int\*](https://clickhouse.com/docs/en/data_types/int_uint/#int-ranges), [Float\*](https://clickhouse.com/docs/en/data_types/float/). +Returns an array of differences between adjacent array elements. [UInt\*](https://clickhouse.com/docs/en/data_types/int_uint/#uint-ranges), [Int\*](https://clickhouse.com/docs/en/data_types/int_uint/#int-ranges), [Float\*](https://clickhouse.com/docs/en/data_types/float/). **Example** @@ -1766,8 +1760,8 @@ arrayReduce(agg_func, arr1, arr2, ..., arrN) **Arguments** -- `agg_func` — The name of an aggregate function which should be a constant [string](../../sql-reference/data-types/string.md). -- `arr` — Any number of [array](../../sql-reference/data-types/array.md) type columns as the parameters of the aggregation function. +- `agg_func` — The name of an aggregate function which should be a constant [string](../data-types/string.md). +- `arr` — Any number of [array](../data-types/array.md) type columns as the parameters of the aggregation function. **Returned value** @@ -1835,15 +1829,13 @@ arrayReduceInRanges(agg_func, ranges, arr1, arr2, ..., arrN) **Arguments** -- `agg_func` — The name of an aggregate function which should be a constant [string](../../sql-reference/data-types/string.md). -- `ranges` — The ranges to aggretate which should be an [array](../../sql-reference/data-types/array.md) of [tuples](../../sql-reference/data-types/tuple.md) which containing the index and the length of each range. -- `arr` — Any number of [Array](../../sql-reference/data-types/array.md) type columns as the parameters of the aggregation function. +- `agg_func` — The name of an aggregate function which should be a constant [string](../data-types/string.md). +- `ranges` — The ranges to aggretate which should be an [array](../data-types/array.md) of [tuples](../data-types/tuple.md) which containing the index and the length of each range. +- `arr` — Any number of [Array](../data-types/array.md) type columns as the parameters of the aggregation function. **Returned value** -- Array containing results of the aggregate function over specified ranges. - -Type: [Array](../../sql-reference/data-types/array.md). +- Array containing results of the aggregate function over specified ranges. [Array](../data-types/array.md). **Example** @@ -1956,7 +1948,7 @@ Alias: `flatten`. **Parameters** -- `array_of_arrays` — [Array](../../sql-reference/data-types/array.md) of arrays. For example, `[[1,2,3], [4,5]]`. +- `array_of_arrays` — [Array](../data-types/array.md) of arrays. For example, `[[1,2,3], [4,5]]`. **Examples** @@ -1982,13 +1974,11 @@ arrayCompact(arr) **Arguments** -`arr` — The [array](../../sql-reference/data-types/array.md) to inspect. +`arr` — The [array](../data-types/array.md) to inspect. **Returned value** -The array without duplicate. - -Type: `Array`. +The array without duplicate. [Array](../data-types/array.md). **Example** @@ -2018,15 +2008,13 @@ arrayZip(arr1, arr2, ..., arrN) **Arguments** -- `arrN` — [Array](../../sql-reference/data-types/array.md). +- `arrN` — [Array](../data-types/array.md). The function can take any number of arrays of different types. All the input arrays must be of equal size. **Returned value** -- Array with elements from the source arrays grouped into [tuples](../../sql-reference/data-types/tuple.md). Data types in the tuple are the same as types of the input arrays and in the same order as arrays are passed. - -Type: [Array](../../sql-reference/data-types/array.md). +- Array with elements from the source arrays grouped into [tuples](../data-types/tuple.md). Data types in the tuple are the same as types of the input arrays and in the same order as arrays are passed. [Array](../data-types/array.md). **Example** @@ -2079,9 +2067,9 @@ Result: └───────────────────────────────────────────────┘ ``` -## arrayMap(func, arr1, …) +## arrayMap(func, arr1, ...) -Returns an array obtained from the original arrays by application of `func(arr1[i], …, arrN[i])` for each element. Arrays `arr1` … `arrN` must have the same number of elements. +Returns an array obtained from the original arrays by application of `func(arr1[i], ..., arrN[i])` for each element. Arrays `arr1` ... `arrN` must have the same number of elements. Examples: @@ -2109,9 +2097,9 @@ SELECT arrayMap((x, y) -> (x, y), [1, 2, 3], [4, 5, 6]) AS res Note that the `arrayMap` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it can’t be omitted. -## arrayFilter(func, arr1, …) +## arrayFilter(func, arr1, ...) -Returns an array containing only the elements in `arr1` for which `func(arr1[i], …, arrN[i])` returns something other than 0. +Returns an array containing only the elements in `arr1` for which `func(arr1[i], ..., arrN[i])` returns something other than 0. Examples: @@ -2142,9 +2130,9 @@ SELECT Note that the `arrayFilter` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it can’t be omitted. -## arrayFill(func, arr1, …) +## arrayFill(func, arr1, ...) -Scan through `arr1` from the first element to the last element and replace `arr1[i]` by `arr1[i - 1]` if `func(arr1[i], …, arrN[i])` returns 0. The first element of `arr1` will not be replaced. +Scan through `arr1` from the first element to the last element and replace `arr1[i]` by `arr1[i - 1]` if `func(arr1[i], ..., arrN[i])` returns 0. The first element of `arr1` will not be replaced. Examples: @@ -2160,9 +2148,9 @@ SELECT arrayFill(x -> not isNull(x), [1, null, 3, 11, 12, null, null, 5, 6, 14, Note that the `arrayFill` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it can’t be omitted. -## arrayReverseFill(func, arr1, …) +## arrayReverseFill(func, arr1, ...) -Scan through `arr1` from the last element to the first element and replace `arr1[i]` by `arr1[i + 1]` if `func(arr1[i], …, arrN[i])` returns 0. The last element of `arr1` will not be replaced. +Scan through `arr1` from the last element to the first element and replace `arr1[i]` by `arr1[i + 1]` if `func(arr1[i], ..., arrN[i])` returns 0. The last element of `arr1` will not be replaced. Examples: @@ -2178,9 +2166,9 @@ SELECT arrayReverseFill(x -> not isNull(x), [1, null, 3, 11, 12, null, null, 5, Note that the `arrayReverseFill` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it can’t be omitted. -## arraySplit(func, arr1, …) +## arraySplit(func, arr1, ...) -Split `arr1` into multiple arrays. When `func(arr1[i], …, arrN[i])` returns something other than 0, the array will be split on the left hand side of the element. The array will not be split before the first element. +Split `arr1` into multiple arrays. When `func(arr1[i], ..., arrN[i])` returns something other than 0, the array will be split on the left hand side of the element. The array will not be split before the first element. Examples: @@ -2196,9 +2184,9 @@ SELECT arraySplit((x, y) -> y, [1, 2, 3, 4, 5], [1, 0, 0, 1, 0]) AS res Note that the `arraySplit` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it can’t be omitted. -## arrayReverseSplit(func, arr1, …) +## arrayReverseSplit(func, arr1, ...) -Split `arr1` into multiple arrays. When `func(arr1[i], …, arrN[i])` returns something other than 0, the array will be split on the right hand side of the element. The array will not be split after the last element. +Split `arr1` into multiple arrays. When `func(arr1[i], ..., arrN[i])` returns something other than 0, the array will be split on the right hand side of the element. The array will not be split after the last element. Examples: @@ -2214,30 +2202,30 @@ SELECT arrayReverseSplit((x, y) -> y, [1, 2, 3, 4, 5], [1, 0, 0, 1, 0]) AS res Note that the `arrayReverseSplit` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it can’t be omitted. -## arrayExists(\[func,\] arr1, …) +## arrayExists(\[func,\] arr1, ...) -Returns 1 if there is at least one element in `arr` for which `func(arr1[i], …, arrN[i])` returns something other than 0. Otherwise, it returns 0. +Returns 1 if there is at least one element in `arr` for which `func(arr1[i], ..., arrN[i])` returns something other than 0. Otherwise, it returns 0. Note that the `arrayExists` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument. -## arrayAll(\[func,\] arr1, …) +## arrayAll(\[func,\] arr1, ...) -Returns 1 if `func(arr1[i], …, arrN[i])` returns something other than 0 for all the elements in arrays. Otherwise, it returns 0. +Returns 1 if `func(arr1[i], ..., arrN[i])` returns something other than 0 for all the elements in arrays. Otherwise, it returns 0. Note that the `arrayAll` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument. -## arrayFirst(func, arr1, …) +## arrayFirst(func, arr1, ...) -Returns the first element in the `arr1` array for which `func(arr1[i], …, arrN[i])` returns something other than 0. +Returns the first element in the `arr1` array for which `func(arr1[i], ..., arrN[i])` returns something other than 0. ## arrayFirstOrNull -Returns the first element in the `arr1` array for which `func(arr1[i], …, arrN[i])` returns something other than 0, otherwise it returns `NULL`. +Returns the first element in the `arr1` array for which `func(arr1[i], ..., arrN[i])` returns something other than 0, otherwise it returns `NULL`. **Syntax** ```sql -arrayFirstOrNull(func, arr1, …) +arrayFirstOrNull(func, arr1, ...) ``` **Parameters** @@ -2292,20 +2280,20 @@ Result: \N ``` -## arrayLast(func, arr1, …) +## arrayLast(func, arr1, ...) -Returns the last element in the `arr1` array for which `func(arr1[i], …, arrN[i])` returns something other than 0. +Returns the last element in the `arr1` array for which `func(arr1[i], ..., arrN[i])` returns something other than 0. Note that the `arrayLast` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it can’t be omitted. ## arrayLastOrNull -Returns the last element in the `arr1` array for which `func(arr1[i], …, arrN[i])` returns something other than 0, otherwise returns `NULL`. +Returns the last element in the `arr1` array for which `func(arr1[i], ..., arrN[i])` returns something other than 0, otherwise returns `NULL`. **Syntax** ```sql -arrayLastOrNull(func, arr1, …) +arrayLastOrNull(func, arr1, ...) ``` **Parameters** @@ -2348,15 +2336,15 @@ Result: \N ``` -## arrayFirstIndex(func, arr1, …) +## arrayFirstIndex(func, arr1, ...) -Returns the index of the first element in the `arr1` array for which `func(arr1[i], …, arrN[i])` returns something other than 0. +Returns the index of the first element in the `arr1` array for which `func(arr1[i], ..., arrN[i])` returns something other than 0. Note that the `arrayFirstIndex` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it can’t be omitted. -## arrayLastIndex(func, arr1, …) +## arrayLastIndex(func, arr1, ...) -Returns the index of the last element in the `arr1` array for which `func(arr1[i], …, arrN[i])` returns something other than 0. +Returns the index of the last element in the `arr1` array for which `func(arr1[i], ..., arrN[i])` returns something other than 0. Note that the `arrayLastIndex` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it can’t be omitted. @@ -2376,14 +2364,16 @@ arrayMin([func,] arr) **Arguments** -- `func` — Function. [Expression](../../sql-reference/data-types/special-data-types/expression.md). -- `arr` — Array. [Array](../../sql-reference/data-types/array.md). +- `func` — Function. [Expression](../data-types/special-data-types/expression.md). +- `arr` — Array. [Array](../data-types/array.md). **Returned value** - The minimum of function values (or the array minimum). -Type: if `func` is specified, matches `func` return value type, else matches the array elements type. +:::note +If `func` is specified, then the return type matches the return value type of `func`, otherwise it matches the type of the array elements. +::: **Examples** @@ -2431,14 +2421,16 @@ arrayMax([func,] arr) **Arguments** -- `func` — Function. [Expression](../../sql-reference/data-types/special-data-types/expression.md). -- `arr` — Array. [Array](../../sql-reference/data-types/array.md). +- `func` — Function. [Expression](../data-types/special-data-types/expression.md). +- `arr` — Array. [Array](../data-types/array.md). **Returned value** - The maximum of function values (or the array maximum). -Type: if `func` is specified, matches `func` return value type, else matches the array elements type. +:::note +if `func` is specified then the return type matches the return value type of `func`, otherwise it matches the type of the array elements. +::: **Examples** @@ -2486,14 +2478,21 @@ arraySum([func,] arr) **Arguments** -- `func` — Function. [Expression](../../sql-reference/data-types/special-data-types/expression.md). -- `arr` — Array. [Array](../../sql-reference/data-types/array.md). +- `func` — Function. [Expression](../data-types/special-data-types/expression.md). +- `arr` — Array. [Array](../data-types/array.md). **Returned value** - The sum of the function values (or the array sum). -Type: for decimal numbers in source array (or for converted values, if `func` is specified) — [Decimal128](../../sql-reference/data-types/decimal.md), for floating point numbers — [Float64](../../sql-reference/data-types/float.md), for numeric unsigned — [UInt64](../../sql-reference/data-types/int-uint.md), and for numeric signed — [Int64](../../sql-reference/data-types/int-uint.md). +:::note +Return type: + +- For decimal numbers in the source array (or for converted values, if `func` is specified) — [Decimal128](../data-types/decimal.md). +- For floating point numbers — [Float64](../data-types/float.md). +- For numeric unsigned — [UInt64](../data-types/int-uint.md). +- For numeric signed — [Int64](../data-types/int-uint.md). +::: **Examples** @@ -2541,14 +2540,12 @@ arrayAvg([func,] arr) **Arguments** -- `func` — Function. [Expression](../../sql-reference/data-types/special-data-types/expression.md). -- `arr` — Array. [Array](../../sql-reference/data-types/array.md). +- `func` — Function. [Expression](../data-types/special-data-types/expression.md). +- `arr` — Array. [Array](../data-types/array.md). **Returned value** -- The average of function values (or the array average). - -Type: [Float64](../../sql-reference/data-types/float.md). +- The average of function values (or the array average). [Float64](../data-types/float.md). **Examples** @@ -2580,9 +2577,9 @@ Result: └─────┘ ``` -## arrayCumSum(\[func,\] arr1, …) +## arrayCumSum(\[func,\] arr1, ...) -Returns an array of the partial (running) sums of the elements in the source array `arr1`. If `func` is specified, then the sum is computed from applying `func` to `arr1`, `arr2`, ..., `arrN`, i.e. `func(arr1[i], …, arrN[i])`. +Returns an array of the partial (running) sums of the elements in the source array `arr1`. If `func` is specified, then the sum is computed from applying `func` to `arr1`, `arr2`, ..., `arrN`, i.e. `func(arr1[i], ..., arrN[i])`. **Syntax** @@ -2592,13 +2589,11 @@ arrayCumSum(arr) **Arguments** -- `arr` — [Array](../../sql-reference/data-types/array.md) of numeric values. +- `arr` — [Array](../data-types/array.md) of numeric values. **Returned value** -- Returns an array of the partial sums of the elements in the source array. - -Type: [UInt\*](https://clickhouse.com/docs/en/data_types/int_uint/#uint-ranges), [Int\*](https://clickhouse.com/docs/en/data_types/int_uint/#int-ranges), [Float\*](https://clickhouse.com/docs/en/data_types/float/). +- Returns an array of the partial sums of the elements in the source array. [UInt\*](https://clickhouse.com/docs/en/data_types/int_uint/#uint-ranges), [Int\*](https://clickhouse.com/docs/en/data_types/int_uint/#int-ranges), [Float\*](https://clickhouse.com/docs/en/data_types/float/). Example: @@ -2614,9 +2609,9 @@ SELECT arrayCumSum([1, 1, 1, 1]) AS res Note that the `arrayCumSum` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument. -## arrayCumSumNonNegative(\[func,\] arr1, …) +## arrayCumSumNonNegative(\[func,\] arr1, ...) -Same as `arrayCumSum`, returns an array of the partial (running) sums of the elements in the source array. If `func` is specified, then the sum is computed from applying `func` to `arr1`, `arr2`, ..., `arrN`, i.e. `func(arr1[i], …, arrN[i])`. Unlike `arrayCumSum`, if the current running sum is smaller than `0`, it is replaced by `0`. +Same as `arrayCumSum`, returns an array of the partial (running) sums of the elements in the source array. If `func` is specified, then the sum is computed from applying `func` to `arr1`, `arr2`, ..., `arrN`, i.e. `func(arr1[i], ..., arrN[i])`. Unlike `arrayCumSum`, if the current running sum is smaller than `0`, it is replaced by `0`. **Syntax** @@ -2626,13 +2621,11 @@ arrayCumSumNonNegative(arr) **Arguments** -- `arr` — [Array](../../sql-reference/data-types/array.md) of numeric values. +- `arr` — [Array](../data-types/array.md) of numeric values. **Returned value** -- Returns an array of non-negative partial sums of elements in the source array. - -Type: [UInt\*](https://clickhouse.com/docs/en/data_types/int_uint/#uint-ranges), [Int\*](https://clickhouse.com/docs/en/data_types/int_uint/#int-ranges), [Float\*](https://clickhouse.com/docs/en/data_types/float/). +- Returns an array of non-negative partial sums of elements in the source array. [UInt\*](https://clickhouse.com/docs/en/data_types/int_uint/#uint-ranges), [Int\*](https://clickhouse.com/docs/en/data_types/int_uint/#int-ranges), [Float\*](https://clickhouse.com/docs/en/data_types/float/). ``` sql SELECT arrayCumSumNonNegative([1, 1, -4, 1]) AS res @@ -2648,7 +2641,7 @@ Note that the `arraySumNonNegative` is a [higher-order function](../../sql-refer ## arrayProduct -Multiplies elements of an [array](../../sql-reference/data-types/array.md). +Multiplies elements of an [array](../data-types/array.md). **Syntax** @@ -2658,13 +2651,11 @@ arrayProduct(arr) **Arguments** -- `arr` — [Array](../../sql-reference/data-types/array.md) of numeric values. +- `arr` — [Array](../data-types/array.md) of numeric values. **Returned value** -- A product of array's elements. - -Type: [Float64](../../sql-reference/data-types/float.md). +- A product of array's elements. [Float64](../data-types/float.md). **Examples** @@ -2688,7 +2679,7 @@ Query: SELECT arrayProduct([toDecimal64(1,8), toDecimal64(2,8), toDecimal64(3,8)]) as res, toTypeName(res); ``` -Return value type is always [Float64](../../sql-reference/data-types/float.md). Result: +Return value type is always [Float64](../data-types/float.md). Result: ``` text ┌─res─┬─toTypeName(arrayProduct(array(toDecimal64(1, 8), toDecimal64(2, 8), toDecimal64(3, 8))))─┐ @@ -2698,7 +2689,7 @@ Return value type is always [Float64](../../sql-reference/data-types/float.md). ## arrayRotateLeft -Rotates an [array](../../sql-reference/data-types/array.md) to the left by the specified number of elements. +Rotates an [array](../data-types/array.md) to the left by the specified number of elements. If the number of elements is negative, the array is rotated to the right. **Syntax** @@ -2709,14 +2700,12 @@ arrayRotateLeft(arr, n) **Arguments** -- `arr` — [Array](../../sql-reference/data-types/array.md). +- `arr` — [Array](../data-types/array.md). - `n` — Number of elements to rotate. **Returned value** -- An array rotated to the left by the specified number of elements. - -Type: [Array](../../sql-reference/data-types/array.md). +- An array rotated to the left by the specified number of elements. [Array](../data-types/array.md). **Examples** @@ -2764,7 +2753,7 @@ Result: ## arrayRotateRight -Rotates an [array](../../sql-reference/data-types/array.md) to the right by the specified number of elements. +Rotates an [array](../data-types/array.md) to the right by the specified number of elements. If the number of elements is negative, the array is rotated to the left. **Syntax** @@ -2775,14 +2764,12 @@ arrayRotateRight(arr, n) **Arguments** -- `arr` — [Array](../../sql-reference/data-types/array.md). +- `arr` — [Array](../data-types/array.md). - `n` — Number of elements to rotate. **Returned value** -- An array rotated to the right by the specified number of elements. - -Type: [Array](../../sql-reference/data-types/array.md). +- An array rotated to the right by the specified number of elements. [Array](../data-types/array.md). **Examples** @@ -2830,7 +2817,7 @@ Result: ## arrayShiftLeft -Shifts an [array](../../sql-reference/data-types/array.md) to the left by the specified number of elements. +Shifts an [array](../data-types/array.md) to the left by the specified number of elements. New elements are filled with the provided argument or the default value of the array element type. If the number of elements is negative, the array is shifted to the right. @@ -2842,15 +2829,13 @@ arrayShiftLeft(arr, n[, default]) **Arguments** -- `arr` — [Array](../../sql-reference/data-types/array.md). +- `arr` — [Array](../data-types/array.md). - `n` — Number of elements to shift. - `default` — Optional. Default value for new elements. **Returned value** -- An array shifted to the left by the specified number of elements. - -Type: [Array](../../sql-reference/data-types/array.md). +- An array shifted to the left by the specified number of elements. [Array](../data-types/array.md). **Examples** @@ -2926,7 +2911,7 @@ Result: ## arrayShiftRight -Shifts an [array](../../sql-reference/data-types/array.md) to the right by the specified number of elements. +Shifts an [array](../data-types/array.md) to the right by the specified number of elements. New elements are filled with the provided argument or the default value of the array element type. If the number of elements is negative, the array is shifted to the left. @@ -2938,15 +2923,13 @@ arrayShiftRight(arr, n[, default]) **Arguments** -- `arr` — [Array](../../sql-reference/data-types/array.md). +- `arr` — [Array](../data-types/array.md). - `n` — Number of elements to shift. - `default` — Optional. Default value for new elements. **Returned value** -- An array shifted to the right by the specified number of elements. - -Type: [Array](../../sql-reference/data-types/array.md). +- An array shifted to the right by the specified number of elements. [Array](../data-types/array.md). **Examples** @@ -3038,9 +3021,7 @@ arrayRandomSample(arr, samples) **Returned Value** -- An array containing a random sample of elements from the input array. - -Type: [Array](../data-types/array.md). +- An array containing a random sample of elements from the input array. [Array](../data-types/array.md). **Examples** diff --git a/docs/en/sql-reference/functions/bit-functions.md b/docs/en/sql-reference/functions/bit-functions.md index 0951c783aae..a48893b93bf 100644 --- a/docs/en/sql-reference/functions/bit-functions.md +++ b/docs/en/sql-reference/functions/bit-functions.md @@ -34,8 +34,8 @@ bitShiftLeft(a, b) **Arguments** -- `a` — A value to shift. [Integer types](../../sql-reference/data-types/int-uint.md), [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). -- `b` — The number of shift positions. [Unsigned integer types](../../sql-reference/data-types/int-uint.md), 64 bit types or less are allowed. +- `a` — A value to shift. [Integer types](../data-types/int-uint.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). +- `b` — The number of shift positions. [Unsigned integer types](../data-types/int-uint.md), 64 bit types or less are allowed. **Returned value** @@ -81,8 +81,8 @@ bitShiftRight(a, b) **Arguments** -- `a` — A value to shift. [Integer types](../../sql-reference/data-types/int-uint.md), [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). -- `b` — The number of shift positions. [Unsigned integer types](../../sql-reference/data-types/int-uint.md), 64 bit types or less are allowed. +- `a` — A value to shift. [Integer types](../data-types/int-uint.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). +- `b` — The number of shift positions. [Unsigned integer types](../data-types/int-uint.md), 64 bit types or less are allowed. **Returned value** @@ -131,13 +131,13 @@ bitSlice(s, offset[, length]) **Arguments** -- `s` — s is [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). +- `s` — s is [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). - `offset` — The start index with bit, A positive value indicates an offset on the left, and a negative value is an indent on the right. Numbering of the bits begins with 1. - `length` — The length of substring with bit. If you specify a negative value, the function returns an open substring \[offset, array_length - length\]. If you omit the value, the function returns the substring \[offset, the_end_string\]. If length exceeds s, it will be truncate.If length isn't multiple of 8, will fill 0 on the right. **Returned value** -- The substring. [String](../../sql-reference/data-types/string.md) +- The substring. [String](../data-types/string.md) **Example** @@ -186,11 +186,9 @@ SELECT bitTest(number, index) - `number` – Integer number. - `index` – Position of bit. -**Returned values** +**Returned value** -Returns a value of bit at specified position. - -Type: `UInt8`. +- Value of the bit at the specified position. [UInt8](../data-types/int-uint.md). **Example** @@ -251,11 +249,9 @@ SELECT bitTestAll(number, index1, index2, index3, index4, ...) - `number` – Integer number. - `index1`, `index2`, `index3`, `index4` – Positions of bit. For example, for set of positions (`index1`, `index2`, `index3`, `index4`) is true if and only if all of its positions are true (`index1` ⋀ `index2`, ⋀ `index3` ⋀ `index4`). -**Returned values** +**Returned value** -Returns result of logical conjuction. - -Type: `UInt8`. +- Result of the logical conjuction. [UInt8](../data-types/int-uint.md). **Example** @@ -316,11 +312,9 @@ SELECT bitTestAny(number, index1, index2, index3, index4, ...) - `number` – Integer number. - `index1`, `index2`, `index3`, `index4` – Positions of bit. -**Returned values** +**Returned value** -Returns result of logical disjunction. - -Type: `UInt8`. +- Result of the logical disjunction. [UInt8](../data-types/int-uint.md). **Example** @@ -368,15 +362,15 @@ bitCount(x) **Arguments** -- `x` — [Integer](../../sql-reference/data-types/int-uint.md) or [floating-point](../../sql-reference/data-types/float.md) number. The function uses the value representation in memory. It allows supporting floating-point numbers. +- `x` — [Integer](../data-types/int-uint.md) or [floating-point](../data-types/float.md) number. The function uses the value representation in memory. It allows supporting floating-point numbers. **Returned value** -- Number of bits set to one in the input number. +- Number of bits set to one in the input number. [UInt8](../data-types/int-uint.md). -The function does not convert input value to a larger type ([sign extension](https://en.wikipedia.org/wiki/Sign_extension)). So, for example, `bitCount(toUInt8(-1)) = 8`. - -Type: `UInt8`. +:::note +The function does not convert the input value to a larger type ([sign extension](https://en.wikipedia.org/wiki/Sign_extension)). So, for example, `bitCount(toUInt8(-1)) = 8`. +::: **Example** @@ -408,14 +402,12 @@ bitHammingDistance(int1, int2) **Arguments** -- `int1` — First integer value. [Int64](../../sql-reference/data-types/int-uint.md). -- `int2` — Second integer value. [Int64](../../sql-reference/data-types/int-uint.md). +- `int1` — First integer value. [Int64](../data-types/int-uint.md). +- `int2` — Second integer value. [Int64](../data-types/int-uint.md). **Returned value** -- The Hamming distance. - -Type: [UInt8](../../sql-reference/data-types/int-uint.md). +- The Hamming distance. [UInt8](../data-types/int-uint.md). **Examples** diff --git a/docs/en/sql-reference/functions/bitmap-functions.md b/docs/en/sql-reference/functions/bitmap-functions.md index 379be302881..a5c8a663b71 100644 --- a/docs/en/sql-reference/functions/bitmap-functions.md +++ b/docs/en/sql-reference/functions/bitmap-functions.md @@ -75,8 +75,8 @@ bitmapSubsetInRange(bitmap, range_start, range_end) **Arguments** - `bitmap` – [Bitmap object](#bitmap_functions-bitmapbuild). -- `range_start` – Start of the range (inclusive). Type: [UInt32](../../sql-reference/data-types/int-uint.md). -- `range_end` – End of the range (exclusive). Type: [UInt32](../../sql-reference/data-types/int-uint.md). +- `range_start` – Start of the range (inclusive). [UInt32](../data-types/int-uint.md). +- `range_end` – End of the range (exclusive). [UInt32](../data-types/int-uint.md). **Example** @@ -105,8 +105,8 @@ bitmapSubsetLimit(bitmap, range_start, cardinality_limit) **Arguments** - `bitmap` – [Bitmap object](#bitmap_functions-bitmapbuild). -- `range_start` – Start of the range (inclusive). Type: [UInt32](../../sql-reference/data-types/int-uint.md). -- `cardinality_limit` – Maximum cardinality of the subset. Type: [UInt32](../../sql-reference/data-types/int-uint.md). +- `range_start` – Start of the range (inclusive). [UInt32](../data-types/int-uint.md). +- `cardinality_limit` – Maximum cardinality of the subset. [UInt32](../data-types/int-uint.md). **Example** @@ -134,9 +134,9 @@ subBitmap(bitmap, offset, cardinality_limit) **Arguments** -- `bitmap` – The bitmap. Type: [Bitmap object](#bitmap_functions-bitmapbuild). -- `offset` – The position of the first element of the subset. Type: [UInt32](../../sql-reference/data-types/int-uint.md). -- `cardinality_limit` – The maximum number of elements in the subset. Type: [UInt32](../../sql-reference/data-types/int-uint.md). +- `bitmap` – The bitmap. [Bitmap object](#bitmap_functions-bitmapbuild). +- `offset` – The position of the first element of the subset. [UInt32](../data-types/int-uint.md). +- `cardinality_limit` – The maximum number of elements in the subset. [UInt32](../data-types/int-uint.md). **Example** @@ -163,14 +163,12 @@ bitmapContains(bitmap, needle) **Arguments** - `bitmap` – [Bitmap object](#bitmap_functions-bitmapbuild). -- `needle` – Searched bit value. Type: [UInt32](../../sql-reference/data-types/int-uint.md). +- `needle` – Searched bit value. [UInt32](../data-types/int-uint.md). **Returned values** -- 0 — If `bitmap` does not contain `needle`. -- 1 — If `bitmap` contains `needle`. - -Type: `UInt8`. +- 0 — If `bitmap` does not contain `needle`. [UInt8](../data-types/int-uint.md). +- 1 — If `bitmap` contains `needle`. [UInt8](../data-types/int-uint.md). **Example** diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index 843f22e5a6f..6ad26f452ad 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -26,7 +26,7 @@ SELECT ## makeDate -Creates a [Date](../../sql-reference/data-types/date.md) +Creates a [Date](../data-types/date.md) - from a year, month and day argument, or - from a year and day of year argument. @@ -43,16 +43,14 @@ Alias: **Arguments** -- `year` — Year. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). -- `month` — Month. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). -- `day` — Day. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). -- `day_of_year` — Day of the year. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `year` — Year. [Integer](../data-types/int-uint.md), [Float](../data-types/float.md) or [Decimal](../data-types/decimal.md). +- `month` — Month. [Integer](../data-types/int-uint.md), [Float](../data-types/float.md) or [Decimal](../data-types/decimal.md). +- `day` — Day. [Integer](../data-types/int-uint.md), [Float](../data-types/float.md) or [Decimal](../data-types/decimal.md). +- `day_of_year` — Day of the year. [Integer](../data-types/int-uint.md), [Float](../data-types/float.md) or [Decimal](../data-types/decimal.md). **Returned value** -- A date created from the arguments. - -Type: [Date](../../sql-reference/data-types/date.md). +- A date created from the arguments. [Date](../data-types/date.md). **Example** @@ -85,11 +83,11 @@ Result: ``` ## makeDate32 -Like [makeDate](#makeDate) but produces a [Date32](../../sql-reference/data-types/date32.md). +Like [makeDate](#makeDate) but produces a [Date32](../data-types/date32.md). ## makeDateTime -Creates a [DateTime](../../sql-reference/data-types/datetime.md) from a year, month, day, hour, minute and second argument. +Creates a [DateTime](../data-types/datetime.md) from a year, month, day, hour, minute and second argument. **Syntax** @@ -99,19 +97,17 @@ makeDateTime(year, month, day, hour, minute, second[, timezone]) **Arguments** -- `year` — Year. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). -- `month` — Month. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). -- `day` — Day. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). -- `hour` — Hour. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). -- `minute` — Minute. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). -- `second` — Second. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `year` — Year. [Integer](../data-types/int-uint.md), [Float](../data-types/float.md) or [Decimal](../data-types/decimal.md). +- `month` — Month. [Integer](../data-types/int-uint.md), [Float](../data-types/float.md) or [Decimal](../data-types/decimal.md). +- `day` — Day. [Integer](../data-types/int-uint.md), [Float](../data-types/float.md) or [Decimal](../data-types/decimal.md). +- `hour` — Hour. [Integer](../data-types/int-uint.md), [Float](../data-types/float.md) or [Decimal](../data-types/decimal.md). +- `minute` — Minute. [Integer](../data-types/int-uint.md), [Float](../data-types/float.md) or [Decimal](../data-types/decimal.md). +- `second` — Second. [Integer](../data-types/int-uint.md), [Float](../data-types/float.md) or [Decimal](../data-types/decimal.md). - `timezone` — [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). **Returned value** -- A date with time created from the arguments. - -Type: [DateTime](../../sql-reference/data-types/datetime.md). +- A date with time created from the arguments. [DateTime](../data-types/datetime.md). **Example** @@ -129,7 +125,7 @@ Result: ## makeDateTime64 -Like [makeDateTime](#makedatetime) but produces a [DateTime64](../../sql-reference/data-types/datetime64.md). +Like [makeDateTime](#makedatetime) but produces a [DateTime64](../data-types/datetime64.md). **Syntax** @@ -139,7 +135,7 @@ makeDateTime64(year, month, day, hour, minute, second[, fraction[, precision[, t ## timestamp -Converts the first argument 'expr' to type [DateTime64(6)](../../sql-reference/data-types/datetime64.md). +Converts the first argument 'expr' to type [DateTime64(6)](../data-types/datetime64.md). If a second argument 'expr_time' is provided, it adds the specified time to the converted value. **Syntax** @@ -152,8 +148,8 @@ Alias: `TIMESTAMP` **Arguments** -- `expr` - Date or date with time. Type: [String](../../sql-reference/data-types/string.md). -- `expr_time` - Optional parameter. Time to add. [String](../../sql-reference/data-types/string.md). +- `expr` - Date or date with time. [String](../data-types/string.md). +- `expr_time` - Optional parameter. Time to add. [String](../data-types/string.md). **Examples** @@ -183,7 +179,7 @@ Result: **Returned value** -- [DateTime64](../../sql-reference/data-types/datetime64.md)(6) +- [DateTime64](../data-types/datetime64.md)(6) ## timeZone @@ -200,9 +196,7 @@ Alias: `timezone`. **Returned value** -- Timezone. - -Type: [String](../../sql-reference/data-types/string.md). +- Timezone. [String](../data-types/string.md). **Example** @@ -237,9 +231,7 @@ Alias: `serverTimezone`. **Returned value** -- Timezone. - -Type: [String](../../sql-reference/data-types/string.md). +- Timezone. [String](../data-types/string.md). **Example** @@ -273,14 +265,12 @@ Alias: `toTimezone`. **Arguments** -- `value` — Time or date and time. [DateTime64](../../sql-reference/data-types/datetime64.md). -- `timezone` — Timezone for the returned value. [String](../../sql-reference/data-types/string.md). This argument is a constant, because `toTimezone` changes the timezone of a column (timezone is an attribute of `DateTime*` types). +- `value` — Time or date and time. [DateTime64](../data-types/datetime64.md). +- `timezone` — Timezone for the returned value. [String](../data-types/string.md). This argument is a constant, because `toTimezone` changes the timezone of a column (timezone is an attribute of `DateTime*` types). **Returned value** -- Date and time. - -Type: [DateTime](../../sql-reference/data-types/datetime.md). +- Date and time. [DateTime](../data-types/datetime.md). **Example** @@ -320,7 +310,7 @@ int32samoa: 1546300800 ## timeZoneOf -Returns the timezone name of [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md) data types. +Returns the timezone name of [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) data types. **Syntax** @@ -332,13 +322,11 @@ Alias: `timezoneOf`. **Arguments** -- `value` — Date and time. [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). +- `value` — Date and time. [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md). **Returned value** -- Timezone name. - -Type: [String](../../sql-reference/data-types/string.md). +- Timezone name. [String](../data-types/string.md). **Example** @@ -369,13 +357,11 @@ Alias: `timezoneOffset`. **Arguments** -- `value` — Date and time. [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). +- `value` — Date and time. [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md). **Returned value** -- Offset from UTC in seconds. - -Type: [Int32](../../sql-reference/data-types/int-uint.md). +- Offset from UTC in seconds. [Int32](../data-types/int-uint.md). **Example** @@ -410,9 +396,7 @@ Alias: `YEAR` **Returned value** -- The year of the given date/time - -Type: `UInt16` +- The year of the given date/time. [UInt16](../data-types/int-uint.md). **Example** @@ -446,9 +430,7 @@ Alias: `QUARTER` **Returned value** -- The quarter of the year (1, 2, 3 or 4) of the given date/time - -Type: `UInt8` +- The quarter of the year (1, 2, 3 or 4) of the given date/time. [UInt8](../data-types/int-uint.md). **Example** @@ -482,9 +464,7 @@ Alias: `MONTH` **Returned value** -- The month of the year (1 - 12) of the given date/time - -Type: `UInt8` +- The month of the year (1 - 12) of the given date/time. [UInt8](../data-types/int-uint.md). **Example** @@ -518,9 +498,7 @@ Alias: `DAYOFYEAR` **Returned value** -- The day of the year (1 - 366) of the given date/time - -Type: `UInt16` +- The day of the year (1 - 366) of the given date/time. [UInt16](../data-types/int-uint.md). **Example** @@ -554,9 +532,7 @@ Aliases: `DAYOFMONTH`, `DAY` **Returned value** -- The day of the month (1 - 31) of the given date/time - -Type: `UInt8` +- The day of the month (1 - 31) of the given date/time. [UInt8](../data-types/int-uint.md). **Example** @@ -643,9 +619,7 @@ Alias: `HOUR` **Returned value** -- The hour of the day (0 - 23) of the given date/time - -Type: `UInt8` +- The hour of the day (0 - 23) of the given date/time. [UInt8](../data-types/int-uint.md). **Example** @@ -679,9 +653,7 @@ Alias: `MINUTE` **Returned value** -- The minute of the hour (0 - 59) of the given date/time - -Type: `UInt8` +- The minute of the hour (0 - 59) of the given date/time. [UInt8](../data-types/int-uint.md). **Example** @@ -715,9 +687,7 @@ Alias: `SECOND` **Returned value** -- The second in the minute (0 - 59) of the given date/time - -Type: `UInt8` +- The second in the minute (0 - 59) of the given date/time. [UInt8](../data-types/int-uint.md). **Example** @@ -763,9 +733,7 @@ Result: **Returned value** -- The millisecond in the minute (0 - 59) of the given date/time - -Type: `UInt16` +- The millisecond in the minute (0 - 59) of the given date/time. [UInt16](../data-types/int-uint.md). ## toUnixTimestamp @@ -782,9 +750,7 @@ toUnixTimestamp(str, [timezone]) **Returned value** -- Returns the unix timestamp. - -Type: `UInt32`. +- Returns the unix timestamp. [UInt32](../data-types/int-uint.md). **Example** @@ -842,9 +808,7 @@ toStartOfYear(value) **Returned value** -- The first day of the year of the input date/time - -Type: `Date` +- The first day of the year of the input date/time. [Date](../data-types/date.md). **Example** @@ -876,9 +840,7 @@ toStartOfISOYear(value) **Returned value** -- The first day of the year of the input date/time - -Type: `Date` +- The first day of the year of the input date/time. [Date](../data-types/date.md). **Example** @@ -911,9 +873,7 @@ toStartOfQuarter(value) **Returned value** -- The first day of the quarter of the given date/time - -Type: `Date` +- The first day of the quarter of the given date/time. [Date](../data-types/date.md). **Example** @@ -945,9 +905,7 @@ toStartOfMonth(value) **Returned value** -- The first day of the month of the given date/time - -Type: `Date` +- The first day of the month of the given date/time. [Date](../data-types/date.md). **Example** @@ -985,9 +943,7 @@ Alias: `LAST_DAY` **Returned value** -- The last day of the month of the given date/time - -Type: `Date` +- The last day of the month of the given date/time=. [Date](../data-types/date.md). **Example** @@ -1019,9 +975,7 @@ toMonday(value) **Returned value** -- The date of the nearest Monday on or prior to the given date - -Type: `Date` +- The date of the nearest Monday on or prior to the given date. [Date](../data-types/date.md). **Example** @@ -1057,9 +1011,7 @@ toStartOfWeek(t[, mode[, timezone]]) **Returned value** -- The date of the nearest Sunday or Monday on or prior to the given date, depending on the mode - -Type: `Date` +- The date of the nearest Sunday or Monday on or prior to the given date, depending on the mode. [Date](../data-types/date.md). **Example** @@ -1102,9 +1054,7 @@ toLastDayOfWeek(t[, mode[, timezone]]) **Returned value** -- The date of the nearest Sunday or Monday on or after the given date, depending on the mode - -Type: `Date` +- The date of the nearest Sunday or Monday on or after the given date, depending on the mode. [Date](../data-types/date.md). **Example** @@ -1144,9 +1094,7 @@ toStartOfDay(value) **Returned value** -- The start of the day of the given date/time - -Type: `DateTime` +- The start of the day of the given date/time. [DateTime](../data-types/datetime.md). **Example** @@ -1178,9 +1126,7 @@ toStartOfHour(value) **Returned value** -- The start of the hour of the given date/time - -Type: `DateTime` +- The start of the hour of the given date/time. [DateTime](../data-types/datetime.md). **Example** @@ -1214,9 +1160,7 @@ toStartOfMinute(value) **Returned value** -- The start of the minute of the given date/time - -Type: `DateTime` +- The start of the minute of the given date/time. [DateTime](../data-types/datetime.md). **Example** @@ -1248,14 +1192,12 @@ toStartOfSecond(value, [timezone]) **Arguments** -- `value` — Date and time. [DateTime64](../../sql-reference/data-types/datetime64.md). -- `timezone` — [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). If not specified, the function uses the timezone of the `value` parameter. [String](../../sql-reference/data-types/string.md). +- `value` — Date and time. [DateTime64](../data-types/datetime64.md). +- `timezone` — [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). If not specified, the function uses the timezone of the `value` parameter. [String](../data-types/string.md). **Returned value** -- Input value without sub-seconds. - -Type: [DateTime64](../../sql-reference/data-types/datetime64.md). +- Input value without sub-seconds. [DateTime64](../data-types/datetime64.md). **Examples** @@ -1309,9 +1251,7 @@ toStartOfFiveMinutes(value) **Returned value** -- The start of the five-minute interval of the given date/time - -Type: `DateTime` +- The start of the five-minute interval of the given date/time. [DateTime](../data-types/datetime.md). **Example** @@ -1349,9 +1289,7 @@ toStartOfTenMinutes(value) **Returned value** -- The start of the ten-minute interval of the given date/time - -Type: `DateTime` +- The start of the ten-minute interval of the given date/time. [DateTime](../data-types/datetime.md). **Example** @@ -1389,9 +1327,7 @@ toStartOfFifteenMinutes(value) **Returned value** -- The start of the fifteen-minute interval of the given date/time - -Type: `DateTime` +- The start of the fifteen-minute interval of the given date/time. [DateTime](../data-types/datetime.md). **Example** @@ -1499,7 +1435,7 @@ This function returns the week number for date or datetime. The two-argument for The following table describes how the mode argument works. -| Mode | First day of week | Range | Week 1 is the first week … | +| Mode | First day of week | Range | Week 1 is the first week ... | |------|-------------------|-------|-------------------------------| | 0 | Sunday | 0-53 | with a Sunday in this year | | 1 | Monday | 0-53 | with 4 or more days this year | @@ -1598,14 +1534,12 @@ Alias: `TO_DAYS` **Arguments** -- `date` — The date to calculate the number of days passed since year zero from. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). -- `time_zone` — A String type const value or an expression represent the time zone. [String types](../../sql-reference/data-types/string.md) +- `date` — The date to calculate the number of days passed since year zero from. [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md). +- `time_zone` — A String type const value or an expression represent the time zone. [String types](../data-types/string.md) **Returned value** -The number of days passed since date 0000-01-01. - -Type: [UInt32](../../sql-reference/data-types/int-uint.md). +The number of days passed since date 0000-01-01. [UInt32](../data-types/int-uint.md). **Example** @@ -1629,7 +1563,7 @@ Result: Returns for a given number of days passed since [1 January 0000](https://en.wikipedia.org/wiki/Year_zero) the corresponding date in the [proleptic Gregorian calendar defined by ISO 8601](https://en.wikipedia.org/wiki/Gregorian_calendar#Proleptic_Gregorian_calendar). The calculation is the same as in MySQL's [`FROM_DAYS()`](https://dev.mysql.com/doc/refman/8.0/en/date-and-time-functions.html#function_from-days) function. -The result is undefined if it cannot be represented within the bounds of the [Date](../../sql-reference/data-types/date.md) type. +The result is undefined if it cannot be represented within the bounds of the [Date](../data-types/date.md) type. **Syntax** @@ -1645,9 +1579,7 @@ Alias: `FROM_DAYS` **Returned value** -The date corresponding to the number of days passed since year zero. - -Type: [Date](../../sql-reference/data-types/date.md). +The date corresponding to the number of days passed since year zero. [Date](../data-types/date.md). **Example** @@ -1669,7 +1601,7 @@ Result: ## fromDaysSinceYearZero32 -Like [fromDaysSinceYearZero](#fromDaysSinceYearZero) but returns a [Date32](../../sql-reference/data-types/date32.md). +Like [fromDaysSinceYearZero](#fromDaysSinceYearZero) but returns a [Date32](../data-types/date32.md). ## age @@ -1686,7 +1618,7 @@ age('unit', startdate, enddate, [timezone]) **Arguments** -- `unit` — The type of interval for result. [String](../../sql-reference/data-types/string.md). +- `unit` — The type of interval for result. [String](../data-types/string.md). Possible values: - `nanosecond`, `nanoseconds`, `ns` @@ -1701,17 +1633,15 @@ age('unit', startdate, enddate, [timezone]) - `quarter`, `quarters`, `qq`, `q` - `year`, `years`, `yyyy`, `yy` -- `startdate` — The first time value to subtract (the subtrahend). [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). +- `startdate` — The first time value to subtract (the subtrahend). [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md). -- `enddate` — The second time value to subtract from (the minuend). [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). +- `enddate` — The second time value to subtract from (the minuend). [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md). -- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). If specified, it is applied to both `startdate` and `enddate`. If not specified, timezones of `startdate` and `enddate` are used. If they are not the same, the result is unspecified. [String](../../sql-reference/data-types/string.md). +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). If specified, it is applied to both `startdate` and `enddate`. If not specified, timezones of `startdate` and `enddate` are used. If they are not the same, the result is unspecified. [String](../data-types/string.md). **Returned value** -Difference between `enddate` and `startdate` expressed in `unit`. - -Type: [Int](../../sql-reference/data-types/int-uint.md). +Difference between `enddate` and `startdate` expressed in `unit`. [Int](../data-types/int-uint.md). **Example** @@ -1764,7 +1694,7 @@ Aliases: `dateDiff`, `DATE_DIFF`, `timestampDiff`, `timestamp_diff`, `TIMESTAMP_ **Arguments** -- `unit` — The type of interval for result. [String](../../sql-reference/data-types/string.md). +- `unit` — The type of interval for result. [String](../data-types/string.md). Possible values: - `nanosecond`, `nanoseconds`, `ns` @@ -1779,17 +1709,15 @@ Aliases: `dateDiff`, `DATE_DIFF`, `timestampDiff`, `timestamp_diff`, `TIMESTAMP_ - `quarter`, `quarters`, `qq`, `q` - `year`, `years`, `yyyy`, `yy` -- `startdate` — The first time value to subtract (the subtrahend). [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). +- `startdate` — The first time value to subtract (the subtrahend). [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md). -- `enddate` — The second time value to subtract from (the minuend). [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). +- `enddate` — The second time value to subtract from (the minuend). [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md). -- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). If specified, it is applied to both `startdate` and `enddate`. If not specified, timezones of `startdate` and `enddate` are used. If they are not the same, the result is unspecified. [String](../../sql-reference/data-types/string.md). +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). If specified, it is applied to both `startdate` and `enddate`. If not specified, timezones of `startdate` and `enddate` are used. If they are not the same, the result is unspecified. [String](../data-types/string.md). **Returned value** -Difference between `enddate` and `startdate` expressed in `unit`. - -Type: [Int](../../sql-reference/data-types/int-uint.md). +Difference between `enddate` and `startdate` expressed in `unit`. [Int](../data-types/int-uint.md). **Example** @@ -1853,14 +1781,12 @@ Alias: `dateTrunc`. `unit` argument is case-insensitive. -- `value` — Date and time. [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). -- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). If not specified, the function uses the timezone of the `value` parameter. [String](../../sql-reference/data-types/string.md). +- `value` — Date and time. [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md). +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). If not specified, the function uses the timezone of the `value` parameter. [String](../data-types/string.md). **Returned value** -- Value, truncated to the specified part of date. - -Type: [DateTime](../../sql-reference/data-types/datetime.md). +- Value, truncated to the specified part of date. [DateTime](../data-types/datetime.md). **Example** @@ -1918,7 +1844,7 @@ Aliases: `dateAdd`, `DATE_ADD`. **Arguments** -- `unit` — The type of interval to add. Note: This is not a [String](../../sql-reference/data-types/string.md) and must therefore not be quoted. +- `unit` — The type of interval to add. Note: This is not a [String](../data-types/string.md) and must therefore not be quoted. Possible values: - `second` @@ -1930,14 +1856,12 @@ Aliases: `dateAdd`, `DATE_ADD`. - `quarter` - `year` -- `value` — Value of interval to add. [Int](../../sql-reference/data-types/int-uint.md). -- `date` — The date or date with time to which `value` is added. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). +- `value` — Value of interval to add. [Int](../data-types/int-uint.md). +- `date` — The date or date with time to which `value` is added. [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md). **Returned value** -Date or date with time obtained by adding `value`, expressed in `unit`, to `date`. - -Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). +Date or date with time obtained by adding `value`, expressed in `unit`, to `date`. [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md). **Example** @@ -1994,7 +1918,7 @@ Aliases: `dateSub`, `DATE_SUB`. **Arguments** -- `unit` — The type of interval to subtract. Note: This is not a [String](../../sql-reference/data-types/string.md) and must therefore not be quoted. +- `unit` — The type of interval to subtract. Note: This is not a [String](../data-types/string.md) and must therefore not be quoted. Possible values: @@ -2007,14 +1931,12 @@ Aliases: `dateSub`, `DATE_SUB`. - `quarter` - `year` -- `value` — Value of interval to subtract. [Int](../../sql-reference/data-types/int-uint.md). -- `date` — The date or date with time from which `value` is subtracted. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). +- `value` — Value of interval to subtract. [Int](../data-types/int-uint.md). +- `date` — The date or date with time from which `value` is subtracted. [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md). **Returned value** -Date or date with time obtained by subtracting `value`, expressed in `unit`, from `date`. - -Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). +Date or date with time obtained by subtracting `value`, expressed in `unit`, from `date`. [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md). **Example** @@ -2063,9 +1985,9 @@ Aliases: `timeStampAdd`, `TIMESTAMP_ADD`. **Arguments** -- `date` — Date or date with time. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). -- `value` — Value of interval to add. [Int](../../sql-reference/data-types/int-uint.md). -- `unit` — The type of interval to add. [String](../../sql-reference/data-types/string.md). +- `date` — Date or date with time. [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md). +- `value` — Value of interval to add. [Int](../data-types/int-uint.md). +- `unit` — The type of interval to add. [String](../data-types/string.md). Possible values: - `second` @@ -2079,9 +2001,7 @@ Aliases: `timeStampAdd`, `TIMESTAMP_ADD`. **Returned value** -Date or date with time with the specified `value` expressed in `unit` added to `date`. - -Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). +Date or date with time with the specified `value` expressed in `unit` added to `date`. [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md). **Example** @@ -2113,7 +2033,7 @@ Aliases: `timeStampSub`, `TIMESTAMP_SUB`. **Arguments** -- `unit` — The type of interval to subtract. [String](../../sql-reference/data-types/string.md). +- `unit` — The type of interval to subtract. [String](../data-types/string.md). Possible values: - `second` @@ -2125,14 +2045,12 @@ Aliases: `timeStampSub`, `TIMESTAMP_SUB`. - `quarter` - `year` -- `value` — Value of interval to subtract. [Int](../../sql-reference/data-types/int-uint.md). -- `date` — Date or date with time. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). +- `value` — Value of interval to subtract. [Int](../data-types/int-uint.md). +- `date` — Date or date with time. [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md). **Returned value** -Date or date with time obtained by subtracting `value`, expressed in `unit`, from `date`. - -Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). +Date or date with time obtained by subtracting `value`, expressed in `unit`, from `date`. [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md). **Example** @@ -2162,14 +2080,12 @@ addDate(date, interval) **Arguments** -- `date` — The date or date with time to which `interval` is added. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md), [DateTime64](../../sql-reference/data-types/datetime64.md), or [String](../../sql-reference/data-types/string.md) -- `interval` — Interval to add. [Interval](../../sql-reference/data-types/special-data-types/interval.md). +- `date` — The date or date with time to which `interval` is added. [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md), [DateTime64](../data-types/datetime64.md), or [String](../data-types/string.md) +- `interval` — Interval to add. [Interval](../data-types/special-data-types/interval.md). **Returned value** -Date or date with time obtained by adding `interval` to `date`. - -Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). +Date or date with time obtained by adding `interval` to `date`. [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md). **Example** @@ -2205,14 +2121,12 @@ subDate(date, interval) **Arguments** -- `date` — The date or date with time from which `interval` is subtracted. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md), [DateTime64](../../sql-reference/data-types/datetime64.md), or [String](../../sql-reference/data-types/string.md) -- `interval` — Interval to subtract. [Interval](../../sql-reference/data-types/special-data-types/interval.md). +- `date` — The date or date with time from which `interval` is subtracted. [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md), [DateTime64](../data-types/datetime64.md), or [String](../data-types/string.md) +- `interval` — Interval to subtract. [Interval](../data-types/special-data-types/interval.md). **Returned value** -Date or date with time obtained by subtracting `interval` from `date`. - -Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). +Date or date with time obtained by subtracting `interval` from `date`. [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md). **Example** @@ -2248,13 +2162,11 @@ now([timezone]) **Arguments** -- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). [String](../../sql-reference/data-types/string.md). +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). [String](../data-types/string.md). **Returned value** -- Current date and time. - -Type: [DateTime](../../sql-reference/data-types/datetime.md). +- Current date and time. [DateTime](../data-types/datetime.md). **Example** @@ -2299,13 +2211,11 @@ now64([scale], [timezone]) **Arguments** - `scale` - Tick size (precision): 10-precision seconds. Valid range: [ 0 : 9 ]. Typically, are used - 3 (default) (milliseconds), 6 (microseconds), 9 (nanoseconds). -- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). [String](../../sql-reference/data-types/string.md). +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). [String](../data-types/string.md). **Returned value** -- Current date and time with sub-second precision. - -Type: [DateTime64](../../sql-reference/data-types/datetime64.md). +- Current date and time with sub-second precision. [DateTime64](../data-types/datetime64.md). **Example** @@ -2335,13 +2245,11 @@ nowInBlock([timezone]) **Arguments** -- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). [String](../../sql-reference/data-types/string.md). +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). [String](../data-types/string.md). **Returned value** -- Current date and time at the moment of processing of each block of data. - -Type: [DateTime](../../sql-reference/data-types/datetime.md). +- Current date and time at the moment of processing of each block of data. [DateTime](../data-types/datetime.md). **Example** @@ -2381,9 +2289,7 @@ today() **Returned value** -- Current date - -Type: [DateTime](../../sql-reference/data-types/datetime.md). +- Current date. [DateTime](../data-types/datetime.md). **Example** @@ -2473,7 +2379,7 @@ Result: ## YYYYMMDDToDate -Converts a number containing the year, month and day number to a [Date](../../sql-reference/data-types/date.md). +Converts a number containing the year, month and day number to a [Date](../data-types/date.md). This function is the opposite of function `toYYYYMMDD()`. @@ -2487,13 +2393,11 @@ YYYYMMDDToDate(yyyymmdd); **Arguments** -- `yyyymmdd` - A number representing the year, month and day. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `yyyymmdd` - A number representing the year, month and day. [Integer](../data-types/int-uint.md), [Float](../data-types/float.md) or [Decimal](../data-types/decimal.md). **Returned value** -- a date created from the arguments. - -Type: [Date](../../sql-reference/data-types/date.md). +- a date created from the arguments. [Date](../data-types/date.md). **Example** @@ -2511,11 +2415,11 @@ Result: ## YYYYMMDDToDate32 -Like function `YYYYMMDDToDate()` but produces a [Date32](../../sql-reference/data-types/date32.md). +Like function `YYYYMMDDToDate()` but produces a [Date32](../data-types/date32.md). ## YYYYMMDDhhmmssToDateTime -Converts a number containing the year, month, day, hours, minute and second number to a [DateTime](../../sql-reference/data-types/datetime.md). +Converts a number containing the year, month, day, hours, minute and second number to a [DateTime](../data-types/datetime.md). The output is undefined if the input does not encode a valid DateTime value. @@ -2529,14 +2433,12 @@ YYYYMMDDhhmmssToDateTime(yyyymmddhhmmss[, timezone]); **Arguments** -- `yyyymmddhhmmss` - A number representing the year, month and day. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `yyyymmddhhmmss` - A number representing the year, month and day. [Integer](../data-types/int-uint.md), [Float](../data-types/float.md) or [Decimal](../data-types/decimal.md). - `timezone` - [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). **Returned value** -- a date with time created from the arguments. - -Type: [DateTime](../../sql-reference/data-types/datetime.md). +- a date with time created from the arguments. [DateTime](../data-types/datetime.md). **Example** @@ -2554,7 +2456,7 @@ Result: ## YYYYMMDDhhmmssToDateTime64 -Like function `YYYYMMDDhhmmssToDate()` but produces a [DateTime64](../../sql-reference/data-types/datetime64.md). +Like function `YYYYMMDDhhmmssToDate()` but produces a [DateTime64](../data-types/datetime64.md). Accepts an additional, optional `precision` parameter after the `timezone` parameter. @@ -3551,7 +3453,7 @@ Formats a Time according to the given Format string. Format is a constant expres formatDateTime uses MySQL datetime format style, refer to https://dev.mysql.com/doc/refman/8.0/en/date-and-time-functions.html#function_date-format. -The opposite operation of this function is [parseDateTime](/docs/en/sql-reference/functions/type-conversion-functions.md#type_conversion_functions-parseDateTime). +The opposite operation of this function is [parseDateTime](../functions/type-conversion-functions.md#type_conversion_functions-parseDateTime). Alias: `DATE_FORMAT`. @@ -3677,7 +3579,7 @@ LIMIT 10 Similar to formatDateTime, except that it formats datetime in Joda style instead of MySQL style. Refer to https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html. -The opposite operation of this function is [parseDateTimeInJodaSyntax](/docs/en/sql-reference/functions/type-conversion-functions.md#type_conversion_functions-parseDateTimeInJodaSyntax). +The opposite operation of this function is [parseDateTimeInJodaSyntax](../functions/type-conversion-functions.md#type_conversion_functions-parseDateTimeInJodaSyntax). **Replacement fields** @@ -3737,15 +3639,13 @@ dateName(date_part, date) **Arguments** -- `date_part` — Date part. Possible values: 'year', 'quarter', 'month', 'week', 'dayofyear', 'day', 'weekday', 'hour', 'minute', 'second'. [String](../../sql-reference/data-types/string.md). -- `date` — Date. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). -- `timezone` — Timezone. Optional. [String](../../sql-reference/data-types/string.md). +- `date_part` — Date part. Possible values: 'year', 'quarter', 'month', 'week', 'dayofyear', 'day', 'weekday', 'hour', 'minute', 'second'. [String](../data-types/string.md). +- `date` — Date. [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md). +- `timezone` — Timezone. Optional. [String](../data-types/string.md). **Returned value** -- The specified part of date. - -Type: [String](../../sql-reference/data-types/string.md#string) +- The specified part of date. [String](../data-types/string.md#string) **Example** @@ -3777,13 +3677,11 @@ monthName(date) **Arguments** -- `date` — Date or date with time. [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). +- `date` — Date or date with time. [Date](../data-types/date.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md). **Returned value** -- The name of the month. - -Type: [String](../../sql-reference/data-types/string.md#string) +- The name of the month. [String](../data-types/string.md#string) **Example** @@ -3806,7 +3704,7 @@ This function converts a Unix timestamp to a calendar date and a time of a day. It can be called in two ways: -When given a single argument of type [Integer](../../sql-reference/data-types/int-uint.md), it returns a value of type [DateTime](../../sql-reference/data-types/datetime.md), i.e. behaves like [toDateTime](../../sql-reference/functions/type-conversion-functions.md#todatetime). +When given a single argument of type [Integer](../data-types/int-uint.md), it returns a value of type [DateTime](../data-types/datetime.md), i.e. behaves like [toDateTime](../../sql-reference/functions/type-conversion-functions.md#todatetime). Alias: `FROM_UNIXTIME`. @@ -3824,7 +3722,7 @@ Result: └──────────────────────────────┘ ``` -When given two or three arguments where the first argument is a value of type [Integer](../../sql-reference/data-types/int-uint.md), [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md), the second argument is a constant format string and the third argument is an optional constant time zone string, the function returns a value of type [String](../../sql-reference/data-types/string.md#string), i.e. it behaves like [formatDateTime](#formatdatetime). In this case, [MySQL's datetime format style](https://dev.mysql.com/doc/refman/8.0/en/date-and-time-functions.html#function_date-format) is used. +When given two or three arguments where the first argument is a value of type [Integer](../data-types/int-uint.md), [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md), the second argument is a constant format string and the third argument is an optional constant time zone string, the function returns a value of type [String](../data-types/string.md#string), i.e. it behaves like [formatDateTime](#formatdatetime). In this case, [MySQL's datetime format style](https://dev.mysql.com/doc/refman/8.0/en/date-and-time-functions.html#function_date-format) is used. **Example:** @@ -3874,13 +3772,11 @@ toModifiedJulianDay(date) **Arguments** -- `date` — Date in text form. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). +- `date` — Date in text form. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). **Returned value** -- Modified Julian Day number. - -Type: [Int32](../../sql-reference/data-types/int-uint.md). +- Modified Julian Day number. [Int32](../data-types/int-uint.md). **Example** @@ -3908,13 +3804,11 @@ toModifiedJulianDayOrNull(date) **Arguments** -- `date` — Date in text form. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). +- `date` — Date in text form. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). **Returned value** -- Modified Julian Day number. - -Type: [Nullable(Int32)](../../sql-reference/data-types/int-uint.md). +- Modified Julian Day number. [Nullable(Int32)](../data-types/int-uint.md). **Example** @@ -3942,13 +3836,11 @@ fromModifiedJulianDay(day) **Arguments** -- `day` — Modified Julian Day number. [Any integral types](../../sql-reference/data-types/int-uint.md). +- `day` — Modified Julian Day number. [Any integral types](../data-types/int-uint.md). **Returned value** -- Date in text form. - -Type: [String](../../sql-reference/data-types/string.md) +- Date in text form. [String](../data-types/string.md) **Example** @@ -3976,13 +3868,11 @@ fromModifiedJulianDayOrNull(day) **Arguments** -- `day` — Modified Julian Day number. [Any integral types](../../sql-reference/data-types/int-uint.md). +- `day` — Modified Julian Day number. [Any integral types](../data-types/int-uint.md). **Returned value** -- Date in text form. - -Type: [Nullable(String)](../../sql-reference/data-types/string.md) +- Date in text form. [Nullable(String)](../data-types/string.md) **Example** @@ -4010,8 +3900,8 @@ toUTCTimestamp(time_val, time_zone) **Arguments** -- `time_val` — A DateTime/DateTime64 type const value or an expression . [DateTime/DateTime64 types](../../sql-reference/data-types/datetime.md) -- `time_zone` — A String type const value or an expression represent the time zone. [String types](../../sql-reference/data-types/string.md) +- `time_val` — A DateTime/DateTime64 type const value or an expression . [DateTime/DateTime64 types](../data-types/datetime.md) +- `time_zone` — A String type const value or an expression represent the time zone. [String types](../data-types/string.md) **Returned value** @@ -4043,8 +3933,8 @@ fromUTCTimestamp(time_val, time_zone) **Arguments** -- `time_val` — A DateTime/DateTime64 type const value or an expression . [DateTime/DateTime64 types](../../sql-reference/data-types/datetime.md) -- `time_zone` — A String type const value or an expression represent the time zone. [String types](../../sql-reference/data-types/string.md) +- `time_val` — A DateTime/DateTime64 type const value or an expression . [DateTime/DateTime64 types](../data-types/datetime.md) +- `time_zone` — A String type const value or an expression represent the time zone. [String types](../data-types/string.md) **Returned value** @@ -4075,8 +3965,8 @@ timeDiff(first_datetime, second_datetime) *Arguments** -- `first_datetime` — A DateTime/DateTime64 type const value or an expression . [DateTime/DateTime64 types](../../sql-reference/data-types/datetime.md) -- `second_datetime` — A DateTime/DateTime64 type const value or an expression . [DateTime/DateTime64 types](../../sql-reference/data-types/datetime.md) +- `first_datetime` — A DateTime/DateTime64 type const value or an expression . [DateTime/DateTime64 types](../data-types/datetime.md) +- `second_datetime` — A DateTime/DateTime64 type const value or an expression . [DateTime/DateTime64 types](../data-types/datetime.md) **Returned value** diff --git a/docs/en/sql-reference/functions/distance-functions.md b/docs/en/sql-reference/functions/distance-functions.md index 5f3514049c7..a455d0af91b 100644 --- a/docs/en/sql-reference/functions/distance-functions.md +++ b/docs/en/sql-reference/functions/distance-functions.md @@ -20,13 +20,11 @@ Alias: `normL1`. **Arguments** -- `vector` — [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). +- `vector` — [Tuple](../data-types/tuple.md) or [Array](../data-types/array.md). **Returned value** -- L1-norm or [taxicab geometry](https://en.wikipedia.org/wiki/Taxicab_geometry) distance. - -Type: [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- L1-norm or [taxicab geometry](https://en.wikipedia.org/wiki/Taxicab_geometry) distance. [UInt](../data-types/int-uint.md), [Float](../data-types/float.md) or [Decimal](../data-types/decimal.md). **Examples** @@ -58,13 +56,11 @@ Alias: `normL2`. **Arguments** -- `vector` — [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). +- `vector` — [Tuple](../data-types/tuple.md) or [Array](../data-types/array.md). **Returned value** -- L2-norm or [Euclidean distance](https://en.wikipedia.org/wiki/Euclidean_distance). - -Type: [Float](../../sql-reference/data-types/float.md). +- L2-norm or [Euclidean distance](https://en.wikipedia.org/wiki/Euclidean_distance). [Float](../data-types/float.md). **Example** @@ -95,13 +91,11 @@ Alias: `normL2Squared`. ***Arguments** -- `vector` — [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). +- `vector` — [Tuple](../data-types/tuple.md) or [Array](../data-types/array.md). **Returned value** -- L2-norm squared. - -Type: [Float](../../sql-reference/data-types/float.md). +- L2-norm squared. [Float](../data-types/float.md). **Example** @@ -133,13 +127,11 @@ Alias: `normLinf`. **Arguments** -- `vector` — [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). +- `vector` — [Tuple](../data-types/tuple.md) or [Array](../data-types/array.md). **Returned value** -- Linf-norm or the maximum absolute value. - -Type: [Float](../../sql-reference/data-types/float.md). +- Linf-norm or the maximum absolute value. [Float](../data-types/float.md). **Example** @@ -171,14 +163,12 @@ Alias: `normLp`. **Arguments** -- `vector` — [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). -- `p` — The power. Possible values: real number in `[1; inf)`. [UInt](../../sql-reference/data-types/int-uint.md) or [Float](../../sql-reference/data-types/float.md). +- `vector` — [Tuple](../data-types/tuple.md) or [Array](../data-types/array.md). +- `p` — The power. Possible values: real number in `[1; inf)`. [UInt](../data-types/int-uint.md) or [Float](../data-types/float.md). **Returned value** -- [Lp-norm](https://en.wikipedia.org/wiki/Norm_(mathematics)#p-norm) - -Type: [Float](../../sql-reference/data-types/float.md). +- [Lp-norm](https://en.wikipedia.org/wiki/Norm_(mathematics)#p-norm). [Float](../data-types/float.md). **Example** @@ -210,14 +200,12 @@ Alias: `distanceL1`. **Arguments** -- `vector1` — First vector. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). -- `vector2` — Second vector. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). +- `vector1` — First vector. [Tuple](../data-types/tuple.md) or [Array](../data-types/array.md). +- `vector2` — Second vector. [Tuple](../data-types/tuple.md) or [Array](../data-types/array.md). **Returned value** -- 1-norm distance. - -Type: [Float](../../sql-reference/data-types/float.md). +- 1-norm distance. [Float](../data-types/float.md). **Example** @@ -249,14 +237,12 @@ Alias: `distanceL2`. **Arguments** -- `vector1` — First vector. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). -- `vector2` — Second vector. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). +- `vector1` — First vector. [Tuple](../data-types/tuple.md) or [Array](../data-types/array.md). +- `vector2` — Second vector. [Tuple](../data-types/tuple.md) or [Array](../data-types/array.md). **Returned value** -- 2-norm distance. - -Type: [Float](../../sql-reference/data-types/float.md). +- 2-norm distance. [Float](../data-types/float.md). **Example** @@ -288,12 +274,12 @@ Alias: `distanceL2Squared`. **Arguments** -- `vector1` — First vector. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). -- `vector2` — Second vector. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). +- `vector1` — First vector. [Tuple](../data-types/tuple.md) or [Array](../data-types/array.md). +- `vector2` — Second vector. [Tuple](../data-types/tuple.md) or [Array](../data-types/array.md). **Returned value** -Type: [Float](../../sql-reference/data-types/float.md). +- Sum of the squares of the difference between the corresponding elements of two vectors. [Float](../data-types/float.md). **Example** @@ -325,14 +311,12 @@ Alias: `distanceLinf`. **Arguments** -- `vector1` — First vector. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). -- `vector1` — Second vector. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). +- `vector1` — First vector. [Tuple](../data-types/tuple.md) or [Array](../data-types/array.md). +- `vector1` — Second vector. [Tuple](../data-types/tuple.md) or [Array](../data-types/array.md). **Returned value** -- Infinity-norm distance. - -Type: [Float](../../sql-reference/data-types/float.md). +- Infinity-norm distance. [Float](../data-types/float.md). **Example** @@ -364,15 +348,13 @@ Alias: `distanceLp`. **Arguments** -- `vector1` — First vector. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). -- `vector2` — Second vector. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). -- `p` — The power. Possible values: real number from `[1; inf)`. [UInt](../../sql-reference/data-types/int-uint.md) or [Float](../../sql-reference/data-types/float.md). +- `vector1` — First vector. [Tuple](../data-types/tuple.md) or [Array](../data-types/array.md). +- `vector2` — Second vector. [Tuple](../data-types/tuple.md) or [Array](../data-types/array.md). +- `p` — The power. Possible values: real number from `[1; inf)`. [UInt](../data-types/int-uint.md) or [Float](../data-types/float.md). **Returned value** -- p-norm distance. - -Type: [Float](../../sql-reference/data-types/float.md). +- p-norm distance. [Float](../data-types/float.md). **Example** @@ -405,13 +387,11 @@ Alias: `normalizeL1`. **Arguments** -- `tuple` — [Tuple](../../sql-reference/data-types/tuple.md). +- `tuple` — [Tuple](../data-types/tuple.md). **Returned value** -- Unit vector. - -Type: [Tuple](../../sql-reference/data-types/tuple.md) of [Float](../../sql-reference/data-types/float.md). +- Unit vector. [Tuple](../data-types/tuple.md) of [Float](../data-types/float.md). **Example** @@ -443,13 +423,11 @@ Alias: `normalizeL1`. **Arguments** -- `tuple` — [Tuple](../../sql-reference/data-types/tuple.md). +- `tuple` — [Tuple](../data-types/tuple.md). **Returned value** -- Unit vector. - -Type: [Tuple](../../sql-reference/data-types/tuple.md) of [Float](../../sql-reference/data-types/float.md). +- Unit vector. [Tuple](../data-types/tuple.md) of [Float](../data-types/float.md). **Example** @@ -481,13 +459,11 @@ Alias: `normalizeLinf `. **Arguments** -- `tuple` — [Tuple](../../sql-reference/data-types/tuple.md). +- `tuple` — [Tuple](../data-types/tuple.md). **Returned value** -- Unit vector. - -Type: [Tuple](../../sql-reference/data-types/tuple.md) of [Float](../../sql-reference/data-types/float.md). +- Unit vector. [Tuple](../data-types/tuple.md) of [Float](../data-types/float.md). **Example** @@ -519,14 +495,12 @@ Alias: `normalizeLp `. **Arguments** -- `tuple` — [Tuple](../../sql-reference/data-types/tuple.md). -- `p` — The power. Possible values: any number from [1;inf). [UInt](../../sql-reference/data-types/int-uint.md) or [Float](../../sql-reference/data-types/float.md). +- `tuple` — [Tuple](../data-types/tuple.md). +- `p` — The power. Possible values: any number from [1;inf). [UInt](../data-types/int-uint.md) or [Float](../data-types/float.md). **Returned value** -- Unit vector. - -Type: [Tuple](../../sql-reference/data-types/tuple.md) of [Float](../../sql-reference/data-types/float.md). +- Unit vector. [Tuple](../data-types/tuple.md) of [Float](../data-types/float.md). **Example** @@ -556,14 +530,12 @@ cosineDistance(vector1, vector2) **Arguments** -- `vector1` — First tuple. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). -- `vector2` — Second tuple. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). +- `vector1` — First tuple. [Tuple](../data-types/tuple.md) or [Array](../data-types/array.md). +- `vector2` — Second tuple. [Tuple](../data-types/tuple.md) or [Array](../data-types/array.md). **Returned value** -- Cosine of the angle between two vectors subtracted from one. - -Type: [Float](../../sql-reference/data-types/float.md). +- Cosine of the angle between two vectors subtracted from one. [Float](../data-types/float.md). **Examples** diff --git a/docs/en/sql-reference/functions/encoding-functions.md b/docs/en/sql-reference/functions/encoding-functions.md index 4f6da764b3c..408b605727d 100644 --- a/docs/en/sql-reference/functions/encoding-functions.md +++ b/docs/en/sql-reference/functions/encoding-functions.md @@ -18,13 +18,11 @@ char(number_1, [number_2, ..., number_n]); **Arguments** -- `number_1, number_2, ..., number_n` — Numerical arguments interpreted as integers. Types: [Int](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md). +- `number_1, number_2, ..., number_n` — Numerical arguments interpreted as integers. Types: [Int](../data-types/int-uint.md), [Float](../data-types/float.md). **Returned value** -- a string of given bytes. - -Type: `String`. +- a string of given bytes. [String](../data-types/string.md). **Example** @@ -88,23 +86,21 @@ The function is using uppercase letters `A-F` and not using any prefixes (like ` For integer arguments, it prints hex digits (“nibbles”) from the most significant to least significant (big-endian or “human-readable” order). It starts with the most significant non-zero byte (leading zero bytes are omitted) but always prints both digits of every byte even if the leading digit is zero. -Values of type [Date](../../sql-reference/data-types/date.md) and [DateTime](../../sql-reference/data-types/datetime.md) are formatted as corresponding integers (the number of days since Epoch for Date and the value of Unix Timestamp for DateTime). +Values of type [Date](../data-types/date.md) and [DateTime](../data-types/datetime.md) are formatted as corresponding integers (the number of days since Epoch for Date and the value of Unix Timestamp for DateTime). -For [String](../../sql-reference/data-types/string.md) and [FixedString](../../sql-reference/data-types/fixedstring.md), all bytes are simply encoded as two hexadecimal numbers. Zero bytes are not omitted. +For [String](../data-types/string.md) and [FixedString](../data-types/fixedstring.md), all bytes are simply encoded as two hexadecimal numbers. Zero bytes are not omitted. -Values of [Float](../../sql-reference/data-types/float.md) and [Decimal](../../sql-reference/data-types/decimal.md) types are encoded as their representation in memory. As we support little-endian architecture, they are encoded in little-endian. Zero leading/trailing bytes are not omitted. +Values of [Float](../data-types/float.md) and [Decimal](../data-types/decimal.md) types are encoded as their representation in memory. As we support little-endian architecture, they are encoded in little-endian. Zero leading/trailing bytes are not omitted. Values of [UUID](../data-types/uuid.md) type are encoded as big-endian order string. **Arguments** -- `arg` — A value to convert to hexadecimal. Types: [String](../../sql-reference/data-types/string.md), [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md), [Decimal](../../sql-reference/data-types/decimal.md), [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md). +- `arg` — A value to convert to hexadecimal. Types: [String](../data-types/string.md), [UInt](../data-types/int-uint.md), [Float](../data-types/float.md), [Decimal](../data-types/decimal.md), [Date](../data-types/date.md) or [DateTime](../data-types/datetime.md). **Returned value** -- A string with the hexadecimal representation of the argument. - -Type: [String](../../sql-reference/data-types/string.md). +- A string with the hexadecimal representation of the argument. [String](../data-types/string.md). **Examples** @@ -185,15 +181,13 @@ unhex(arg) **Arguments** -- `arg` — A string containing any number of hexadecimal digits. Type: [String](../../sql-reference/data-types/string.md), [FixedString](../../sql-reference/data-types/fixedstring.md). +- `arg` — A string containing any number of hexadecimal digits. [String](../data-types/string.md), [FixedString](../data-types/fixedstring.md). Supports both uppercase and lowercase letters `A-F`. The number of hexadecimal digits does not have to be even. If it is odd, the last digit is interpreted as the least significant half of the `00-0F` byte. If the argument string contains anything other than hexadecimal digits, some implementation-defined result is returned (an exception isn’t thrown). For a numeric argument the inverse of hex(N) is not performed by unhex(). **Returned value** -- A binary string (BLOB). - -Type: [String](../../sql-reference/data-types/string.md). +- A binary string (BLOB). [String](../data-types/string.md). **Example** @@ -237,23 +231,21 @@ Alias: `BIN`. For integer arguments, it prints bin digits from the most significant to least significant (big-endian or “human-readable” order). It starts with the most significant non-zero byte (leading zero bytes are omitted) but always prints eight digits of every byte if the leading digit is zero. -Values of type [Date](../../sql-reference/data-types/date.md) and [DateTime](../../sql-reference/data-types/datetime.md) are formatted as corresponding integers (the number of days since Epoch for `Date` and the value of Unix Timestamp for `DateTime`). +Values of type [Date](../data-types/date.md) and [DateTime](../data-types/datetime.md) are formatted as corresponding integers (the number of days since Epoch for `Date` and the value of Unix Timestamp for `DateTime`). -For [String](../../sql-reference/data-types/string.md) and [FixedString](../../sql-reference/data-types/fixedstring.md), all bytes are simply encoded as eight binary numbers. Zero bytes are not omitted. +For [String](../data-types/string.md) and [FixedString](../data-types/fixedstring.md), all bytes are simply encoded as eight binary numbers. Zero bytes are not omitted. -Values of [Float](../../sql-reference/data-types/float.md) and [Decimal](../../sql-reference/data-types/decimal.md) types are encoded as their representation in memory. As we support little-endian architecture, they are encoded in little-endian. Zero leading/trailing bytes are not omitted. +Values of [Float](../data-types/float.md) and [Decimal](../data-types/decimal.md) types are encoded as their representation in memory. As we support little-endian architecture, they are encoded in little-endian. Zero leading/trailing bytes are not omitted. Values of [UUID](../data-types/uuid.md) type are encoded as big-endian order string. **Arguments** -- `arg` — A value to convert to binary. [String](../../sql-reference/data-types/string.md), [FixedString](../../sql-reference/data-types/fixedstring.md), [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md), [Decimal](../../sql-reference/data-types/decimal.md), [Date](../../sql-reference/data-types/date.md), or [DateTime](../../sql-reference/data-types/datetime.md). +- `arg` — A value to convert to binary. [String](../data-types/string.md), [FixedString](../data-types/fixedstring.md), [UInt](../data-types/int-uint.md), [Float](../data-types/float.md), [Decimal](../data-types/decimal.md), [Date](../data-types/date.md), or [DateTime](../data-types/datetime.md). **Returned value** -- A string with the binary representation of the argument. - -Type: [String](../../sql-reference/data-types/string.md). +- A string with the binary representation of the argument. [String](../data-types/string.md). **Examples** @@ -338,13 +330,11 @@ Supports binary digits `0` and `1`. The number of binary digits does not have to **Arguments** -- `arg` — A string containing any number of binary digits. [String](../../sql-reference/data-types/string.md). +- `arg` — A string containing any number of binary digits. [String](../data-types/string.md). **Returned value** -- A binary string (BLOB). - -Type: [String](../../sql-reference/data-types/string.md). +- A binary string (BLOB). [String](../data-types/string.md). **Examples** @@ -396,13 +386,11 @@ bitPositionsToArray(arg) **Arguments** -- `arg` — Integer value. [Int/UInt](../../sql-reference/data-types/int-uint.md). +- `arg` — Integer value. [Int/UInt](../data-types/int-uint.md). **Returned value** -- An array containing a list of positions of bits that equal `1`, in ascending order. - -Type: [Array](../../sql-reference/data-types/array.md)([UInt64](../../sql-reference/data-types/int-uint.md)). +- An array containing a list of positions of bits that equal `1`, in ascending order. [Array](../data-types/array.md)([UInt64](../data-types/int-uint.md)). **Example** @@ -454,13 +442,11 @@ mortonEncode(args) **Parameters** -- `args`: up to 8 [unsigned integers](../../sql-reference/data-types/int-uint.md) or columns of the aforementioned type. +- `args`: up to 8 [unsigned integers](../data-types/int-uint.md) or columns of the aforementioned type. **Returned value** -- A UInt64 code - -Type: [UInt64](../../sql-reference/data-types/int-uint.md) +- A UInt64 code. [UInt64](../data-types/int-uint.md) **Example** @@ -477,7 +463,7 @@ Result: ### Expanded mode -Accepts a range mask ([tuple](../../sql-reference/data-types/tuple.md)) as a first argument and up to 8 [unsigned integers](../../sql-reference/data-types/int-uint.md) as other arguments. +Accepts a range mask ([tuple](../data-types/tuple.md)) as a first argument and up to 8 [unsigned integers](../data-types/int-uint.md) as other arguments. Each number in the mask configures the amount of range expansion:
1 - no expansion
@@ -494,15 +480,13 @@ mortonEncode(range_mask, args) **Parameters** - `range_mask`: 1-8. -- `args`: up to 8 [unsigned integers](../../sql-reference/data-types/int-uint.md) or columns of the aforementioned type. +- `args`: up to 8 [unsigned integers](../data-types/int-uint.md) or columns of the aforementioned type. Note: when using columns for `args` the provided `range_mask` tuple should still be a constant. **Returned value** -- A UInt64 code - -Type: [UInt64](../../sql-reference/data-types/int-uint.md) +- A UInt64 code. [UInt64](../data-types/int-uint.md) **Example** @@ -595,7 +579,7 @@ Result: **implementation details** -Please note that you can fit only so many bits of information into Morton code as [UInt64](../../sql-reference/data-types/int-uint.md) has. Two arguments will have a range of maximum 2^32 (64/2) each, three arguments a range of max 2^21 (64/3) each and so on. All overflow will be clamped to zero. +Please note that you can fit only so many bits of information into Morton code as [UInt64](../data-types/int-uint.md) has. Two arguments will have a range of maximum 2^32 (64/2) each, three arguments a range of max 2^21 (64/3) each and so on. All overflow will be clamped to zero. ## mortonDecode @@ -617,13 +601,11 @@ mortonDecode(tuple_size, code) **Parameters** - `tuple_size`: integer value no more than 8. -- `code`: [UInt64](../../sql-reference/data-types/int-uint.md) code. +- `code`: [UInt64](../data-types/int-uint.md) code. **Returned value** -- [tuple](../../sql-reference/data-types/tuple.md) of the specified size. - -Type: [UInt64](../../sql-reference/data-types/int-uint.md) +- [tuple](../data-types/tuple.md) of the specified size. [UInt64](../data-types/int-uint.md) **Example** diff --git a/docs/en/sql-reference/functions/encryption-functions.md b/docs/en/sql-reference/functions/encryption-functions.md index 00c9ef376d3..5d82e26eb32 100644 --- a/docs/en/sql-reference/functions/encryption-functions.md +++ b/docs/en/sql-reference/functions/encryption-functions.md @@ -30,15 +30,15 @@ encrypt('mode', 'plaintext', 'key' [, iv, aad]) **Arguments** -- `mode` — Encryption mode. [String](../../sql-reference/data-types/string.md#string). -- `plaintext` — Text that need to be encrypted. [String](../../sql-reference/data-types/string.md#string). -- `key` — Encryption key. [String](../../sql-reference/data-types/string.md#string). -- `iv` — Initialization vector. Required for `-gcm` modes, optional for others. [String](../../sql-reference/data-types/string.md#string). -- `aad` — Additional authenticated data. It isn't encrypted, but it affects decryption. Works only in `-gcm` modes, for others would throw an exception. [String](../../sql-reference/data-types/string.md#string). +- `mode` — Encryption mode. [String](../data-types/string.md#string). +- `plaintext` — Text that need to be encrypted. [String](../data-types/string.md#string). +- `key` — Encryption key. [String](../data-types/string.md#string). +- `iv` — Initialization vector. Required for `-gcm` modes, optional for others. [String](../data-types/string.md#string). +- `aad` — Additional authenticated data. It isn't encrypted, but it affects decryption. Works only in `-gcm` modes, for others would throw an exception. [String](../data-types/string.md#string). **Returned value** -- Ciphertext binary string. [String](../../sql-reference/data-types/string.md#string). +- Ciphertext binary string. [String](../data-types/string.md#string). **Examples** @@ -123,14 +123,14 @@ aes_encrypt_mysql('mode', 'plaintext', 'key' [, iv]) **Arguments** -- `mode` — Encryption mode. [String](../../sql-reference/data-types/string.md#string). -- `plaintext` — Text that needs to be encrypted. [String](../../sql-reference/data-types/string.md#string). -- `key` — Encryption key. If key is longer than required by mode, MySQL-specific key folding is performed. [String](../../sql-reference/data-types/string.md#string). -- `iv` — Initialization vector. Optional, only first 16 bytes are taken into account [String](../../sql-reference/data-types/string.md#string). +- `mode` — Encryption mode. [String](../data-types/string.md#string). +- `plaintext` — Text that needs to be encrypted. [String](../data-types/string.md#string). +- `key` — Encryption key. If key is longer than required by mode, MySQL-specific key folding is performed. [String](../data-types/string.md#string). +- `iv` — Initialization vector. Optional, only first 16 bytes are taken into account [String](../data-types/string.md#string). **Returned value** -- Ciphertext binary string. [String](../../sql-reference/data-types/string.md#string). +- Ciphertext binary string. [String](../data-types/string.md#string). **Examples** @@ -230,15 +230,15 @@ decrypt('mode', 'ciphertext', 'key' [, iv, aad]) **Arguments** -- `mode` — Decryption mode. [String](../../sql-reference/data-types/string.md#string). -- `ciphertext` — Encrypted text that needs to be decrypted. [String](../../sql-reference/data-types/string.md#string). -- `key` — Decryption key. [String](../../sql-reference/data-types/string.md#string). -- `iv` — Initialization vector. Required for `-gcm` modes, Optional for others. [String](../../sql-reference/data-types/string.md#string). -- `aad` — Additional authenticated data. Won't decrypt if this value is incorrect. Works only in `-gcm` modes, for others would throw an exception. [String](../../sql-reference/data-types/string.md#string). +- `mode` — Decryption mode. [String](../data-types/string.md#string). +- `ciphertext` — Encrypted text that needs to be decrypted. [String](../data-types/string.md#string). +- `key` — Decryption key. [String](../data-types/string.md#string). +- `iv` — Initialization vector. Required for `-gcm` modes, Optional for others. [String](../data-types/string.md#string). +- `aad` — Additional authenticated data. Won't decrypt if this value is incorrect. Works only in `-gcm` modes, for others would throw an exception. [String](../data-types/string.md#string). **Returned value** -- Decrypted String. [String](../../sql-reference/data-types/string.md#string). +- Decrypted String. [String](../data-types/string.md#string). **Examples** @@ -361,14 +361,14 @@ aes_decrypt_mysql('mode', 'ciphertext', 'key' [, iv]) **Arguments** -- `mode` — Decryption mode. [String](../../sql-reference/data-types/string.md#string). -- `ciphertext` — Encrypted text that needs to be decrypted. [String](../../sql-reference/data-types/string.md#string). -- `key` — Decryption key. [String](../../sql-reference/data-types/string.md#string). -- `iv` — Initialization vector. Optional. [String](../../sql-reference/data-types/string.md#string). +- `mode` — Decryption mode. [String](../data-types/string.md#string). +- `ciphertext` — Encrypted text that needs to be decrypted. [String](../data-types/string.md#string). +- `key` — Decryption key. [String](../data-types/string.md#string). +- `iv` — Initialization vector. Optional. [String](../data-types/string.md#string). **Returned value** -- Decrypted String. [String](../../sql-reference/data-types/string.md#string). +- Decrypted String. [String](../data-types/string.md#string). **Examples** diff --git a/docs/en/sql-reference/functions/ext-dict-functions.md b/docs/en/sql-reference/functions/ext-dict-functions.md index 4149afce044..82c21ce40c8 100644 --- a/docs/en/sql-reference/functions/ext-dict-functions.md +++ b/docs/en/sql-reference/functions/ext-dict-functions.md @@ -25,9 +25,9 @@ dictGetOrNull('dict_name', attr_name, id_expr) **Arguments** - `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). -- `attr_names` — Name of the column of the dictionary, [String literal](../../sql-reference/syntax.md#syntax-string-literal), or tuple of column names, [Tuple](../../sql-reference/data-types/tuple.md)([String literal](../../sql-reference/syntax.md#syntax-string-literal)). -- `id_expr` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning dictionary key-type value or [Tuple](../../sql-reference/data-types/tuple.md)-type value depending on the dictionary configuration. -- `default_value_expr` — Values returned if the dictionary does not contain a row with the `id_expr` key. [Expression](../../sql-reference/syntax.md#syntax-expressions) or [Tuple](../../sql-reference/data-types/tuple.md)([Expression](../../sql-reference/syntax.md#syntax-expressions)), returning the value (or values) in the data types configured for the `attr_names` attribute. +- `attr_names` — Name of the column of the dictionary, [String literal](../../sql-reference/syntax.md#syntax-string-literal), or tuple of column names, [Tuple](../data-types/tuple.md)([String literal](../../sql-reference/syntax.md#syntax-string-literal)). +- `id_expr` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning dictionary key-type value or [Tuple](../data-types/tuple.md)-type value depending on the dictionary configuration. +- `default_value_expr` — Values returned if the dictionary does not contain a row with the `id_expr` key. [Expression](../../sql-reference/syntax.md#syntax-expressions) or [Tuple](../data-types/tuple.md)([Expression](../../sql-reference/syntax.md#syntax-expressions)), returning the value (or values) in the data types configured for the `attr_names` attribute. **Returned value** @@ -239,14 +239,12 @@ dictHas('dict_name', id_expr) **Arguments** - `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). -- `id_expr` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning dictionary key-type value or [Tuple](../../sql-reference/data-types/tuple.md)-type value depending on the dictionary configuration. +- `id_expr` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning dictionary key-type value or [Tuple](../data-types/tuple.md)-type value depending on the dictionary configuration. **Returned value** -- 0, if there is no key. -- 1, if there is a key. - -Type: `UInt8`. +- 0, if there is no key. [UInt8](../data-types/int-uint.md). +- 1, if there is a key. [UInt8](../data-types/int-uint.md). ## dictGetHierarchy @@ -261,13 +259,11 @@ dictGetHierarchy('dict_name', key) **Arguments** - `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). -- `key` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md)-type value. +- `key` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../data-types/int-uint.md)-type value. **Returned value** -- Parents for the key. - -Type: [Array(UInt64)](../../sql-reference/data-types/array.md). +- Parents for the key. [Array(UInt64)](../data-types/array.md). ## dictIsIn @@ -280,15 +276,13 @@ dictIsIn('dict_name', child_id_expr, ancestor_id_expr) **Arguments** - `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). -- `child_id_expr` — Key to be checked. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md)-type value. -- `ancestor_id_expr` — Alleged ancestor of the `child_id_expr` key. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md)-type value. +- `child_id_expr` — Key to be checked. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../data-types/int-uint.md)-type value. +- `ancestor_id_expr` — Alleged ancestor of the `child_id_expr` key. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../data-types/int-uint.md)-type value. **Returned value** -- 0, if `child_id_expr` is not a child of `ancestor_id_expr`. -- 1, if `child_id_expr` is a child of `ancestor_id_expr` or if `child_id_expr` is an `ancestor_id_expr`. - -Type: `UInt8`. +- 0, if `child_id_expr` is not a child of `ancestor_id_expr`. [UInt8](../data-types/int-uint.md). +- 1, if `child_id_expr` is a child of `ancestor_id_expr` or if `child_id_expr` is an `ancestor_id_expr`. [UInt8](../data-types/int-uint.md). ## dictGetChildren @@ -303,13 +297,11 @@ dictGetChildren(dict_name, key) **Arguments** - `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). -- `key` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md)-type value. +- `key` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../data-types/int-uint.md)-type value. **Returned values** -- First-level descendants for the key. - -Type: [Array](../../sql-reference/data-types/array.md)([UInt64](../../sql-reference/data-types/int-uint.md)). +- First-level descendants for the key. [Array](../data-types/array.md)([UInt64](../data-types/int-uint.md)). **Example** @@ -352,14 +344,12 @@ dictGetDescendants(dict_name, key, level) **Arguments** - `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). -- `key` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md)-type value. -- `level` — Hierarchy level. If `level = 0` returns all descendants to the end. [UInt8](../../sql-reference/data-types/int-uint.md). +- `key` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../data-types/int-uint.md)-type value. +- `level` — Hierarchy level. If `level = 0` returns all descendants to the end. [UInt8](../data-types/int-uint.md). **Returned values** -- Descendants for the key. - -Type: [Array](../../sql-reference/data-types/array.md)([UInt64](../../sql-reference/data-types/int-uint.md)). +- Descendants for the key. [Array](../data-types/array.md)([UInt64](../data-types/int-uint.md)). **Example** @@ -419,8 +409,8 @@ dictGetAll('dict_name', attr_names, id_expr[, limit]) **Arguments** - `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). -- `attr_names` — Name of the column of the dictionary, [String literal](../../sql-reference/syntax.md#syntax-string-literal), or tuple of column names, [Tuple](../../sql-reference/data-types/tuple.md)([String literal](../../sql-reference/syntax.md#syntax-string-literal)). -- `id_expr` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning array of dictionary key-type value or [Tuple](../../sql-reference/data-types/tuple.md)-type value depending on the dictionary configuration. +- `attr_names` — Name of the column of the dictionary, [String literal](../../sql-reference/syntax.md#syntax-string-literal), or tuple of column names, [Tuple](../data-types/tuple.md)([String literal](../../sql-reference/syntax.md#syntax-string-literal)). +- `id_expr` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning array of dictionary key-type value or [Tuple](../data-types/tuple.md)-type value depending on the dictionary configuration. - `limit` - Maximum length for each value array returned. When truncating, child nodes are given precedence over parent nodes, and otherwise the defined list order for the regexp tree dictionary is respected. If unspecified, array length is unlimited. **Returned value** @@ -509,7 +499,7 @@ dictGet[Type]OrDefault('dict_name', 'attr_name', id_expr, default_value_expr) - `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). - `attr_name` — Name of the column of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). -- `id_expr` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md) or [Tuple](../../sql-reference/data-types/tuple.md)-type value depending on the dictionary configuration. +- `id_expr` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../data-types/int-uint.md) or [Tuple](../data-types/tuple.md)-type value depending on the dictionary configuration. - `default_value_expr` — Value returned if the dictionary does not contain a row with the `id_expr` key. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning the value in the data type configured for the `attr_name` attribute. **Returned value** diff --git a/docs/en/sql-reference/functions/files.md b/docs/en/sql-reference/functions/files.md index d62cd1db88d..ac9e21cd416 100644 --- a/docs/en/sql-reference/functions/files.md +++ b/docs/en/sql-reference/functions/files.md @@ -19,7 +19,7 @@ file(path[, default]) **Arguments** - `path` — The path of the file relative to [user_files_path](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Supports wildcards `*`, `**`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` are numbers and `'abc', 'def'` are strings. -- `default` — The value returned if the file does not exist or cannot be accessed. Supported data types: [String](../../sql-reference/data-types/string.md) and [NULL](../../sql-reference/syntax.md#null-literal). +- `default` — The value returned if the file does not exist or cannot be accessed. Supported data types: [String](../data-types/string.md) and [NULL](../../sql-reference/syntax.md#null-literal). **Example** diff --git a/docs/en/sql-reference/functions/functions-for-nulls.md b/docs/en/sql-reference/functions/functions-for-nulls.md index 90520145b9d..a0dfbebc8ae 100644 --- a/docs/en/sql-reference/functions/functions-for-nulls.md +++ b/docs/en/sql-reference/functions/functions-for-nulls.md @@ -351,7 +351,7 @@ Result: ## assumeNotNull -Returns the corresponding non-`Nullable` value for a value of [Nullable](../../sql-reference/data-types/nullable.md) type. If the original value is `NULL`, an arbitrary result can be returned. See also functions `ifNull` and `coalesce`. +Returns the corresponding non-`Nullable` value for a value of [Nullable](../data-types/nullable.md) type. If the original value is `NULL`, an arbitrary result can be returned. See also functions `ifNull` and `coalesce`. ``` sql assumeNotNull(x) diff --git a/docs/en/sql-reference/functions/geo/coordinates.md b/docs/en/sql-reference/functions/geo/coordinates.md index 1cbc1933206..d10573b8995 100644 --- a/docs/en/sql-reference/functions/geo/coordinates.md +++ b/docs/en/sql-reference/functions/geo/coordinates.md @@ -152,8 +152,8 @@ pointInPolygon((x, y), [(a, b), (c, d) ...], ...) **Input values** -- `(x, y)` — Coordinates of a point on the plane. Data type — [Tuple](../../../sql-reference/data-types/tuple.md) — A tuple of two numbers. -- `[(a, b), (c, d) ...]` — Polygon vertices. Data type — [Array](../../../sql-reference/data-types/array.md). Each vertex is represented by a pair of coordinates `(a, b)`. Vertices should be specified in a clockwise or counterclockwise order. The minimum number of vertices is 3. The polygon must be constant. +- `(x, y)` — Coordinates of a point on the plane. Data type — [Tuple](../../data-types/tuple.md) — A tuple of two numbers. +- `[(a, b), (c, d) ...]` — Polygon vertices. Data type — [Array](../../data-types/array.md). Each vertex is represented by a pair of coordinates `(a, b)`. Vertices should be specified in a clockwise or counterclockwise order. The minimum number of vertices is 3. The polygon must be constant. - The function also supports polygons with holes (cut out sections). In this case, add polygons that define the cut out sections using additional arguments of the function. The function does not support non-simply-connected polygons. **Returned values** diff --git a/docs/en/sql-reference/functions/geo/geohash.md b/docs/en/sql-reference/functions/geo/geohash.md index ce16af44e90..8abc8006e5d 100644 --- a/docs/en/sql-reference/functions/geo/geohash.md +++ b/docs/en/sql-reference/functions/geo/geohash.md @@ -74,11 +74,11 @@ geohashesInBox(longitude_min, latitude_min, longitude_max, latitude_max, precisi **Arguments** -- `longitude_min` — Minimum longitude. Range: `[-180°, 180°]`. Type: [Float](../../../sql-reference/data-types/float.md). -- `latitude_min` — Minimum latitude. Range: `[-90°, 90°]`. Type: [Float](../../../sql-reference/data-types/float.md). -- `longitude_max` — Maximum longitude. Range: `[-180°, 180°]`. Type: [Float](../../../sql-reference/data-types/float.md). -- `latitude_max` — Maximum latitude. Range: `[-90°, 90°]`. Type: [Float](../../../sql-reference/data-types/float.md). -- `precision` — Geohash precision. Range: `[1, 12]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). +- `longitude_min` — Minimum longitude. Range: `[-180°, 180°]`. [Float](../../data-types/float.md). +- `latitude_min` — Minimum latitude. Range: `[-90°, 90°]`. [Float](../../data-types/float.md). +- `longitude_max` — Maximum longitude. Range: `[-180°, 180°]`. [Float](../../data-types/float.md). +- `latitude_max` — Maximum latitude. Range: `[-90°, 90°]`. [Float](../../data-types/float.md). +- `precision` — Geohash precision. Range: `[1, 12]`. [UInt8](../../data-types/int-uint.md). :::note All coordinate parameters must be of the same type: either `Float32` or `Float64`. @@ -86,11 +86,9 @@ All coordinate parameters must be of the same type: either `Float32` or `Float64 **Returned values** -- Array of precision-long strings of geohash-boxes covering provided area, you should not rely on order of items. +- Array of precision-long strings of geohash-boxes covering provided area, you should not rely on order of items. [Array](../../data-types/array.md)([String](../../data-types/string.md)). - `[]` - Empty array if minimum latitude and longitude values aren’t less than corresponding maximum values. -Type: [Array](../../../sql-reference/data-types/array.md)([String](../../../sql-reference/data-types/string.md)). - :::note Function throws an exception if resulting array is over 10’000’000 items long. ::: diff --git a/docs/en/sql-reference/functions/geo/h3.md b/docs/en/sql-reference/functions/geo/h3.md index 29486c58e6a..bcdd457964a 100644 --- a/docs/en/sql-reference/functions/geo/h3.md +++ b/docs/en/sql-reference/functions/geo/h3.md @@ -26,14 +26,12 @@ h3IsValid(h3index) **Parameter** -- `h3index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `h3index` — Hexagon index number. [UInt64](../../data-types/int-uint.md). **Returned values** -- 1 — The number is a valid H3 index. -- 0 — The number is not a valid H3 index. - -Type: [UInt8](../../../sql-reference/data-types/int-uint.md). +- 1 — The number is a valid H3 index. [UInt8](../../data-types/int-uint.md). +- 0 — The number is not a valid H3 index. [UInt8](../../data-types/int-uint.md). **Example** @@ -63,14 +61,12 @@ h3GetResolution(h3index) **Parameter** -- `h3index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `h3index` — Hexagon index number. [UInt64](../../data-types/int-uint.md). **Returned values** -- Index resolution. Range: `[0, 15]`. -- If the index is not valid, the function returns a random value. Use [h3IsValid](#h3isvalid) to verify the index. - -Type: [UInt8](../../../sql-reference/data-types/int-uint.md). +- Index resolution. Range: `[0, 15]`. [UInt8](../../data-types/int-uint.md). +- If the index is not valid, the function returns a random value. Use [h3IsValid](#h3isvalid) to verify the index. [UInt8](../../data-types/int-uint.md). **Example** @@ -100,11 +96,11 @@ h3EdgeAngle(resolution) **Parameter** -- `resolution` — Index resolution. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). Range: `[0, 15]`. +- `resolution` — Index resolution. [UInt8](../../data-types/int-uint.md). Range: `[0, 15]`. **Returned values** -- The average length of the [H3](#h3index) hexagon edge in grades. Type: [Float64](../../../sql-reference/data-types/float.md). +- The average length of the [H3](#h3index) hexagon edge in grades. [Float64](../../data-types/float.md). **Example** @@ -134,11 +130,11 @@ h3EdgeLengthM(resolution) **Parameter** -- `resolution` — Index resolution. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). Range: `[0, 15]`. +- `resolution` — Index resolution. [UInt8](../../data-types/int-uint.md). Range: `[0, 15]`. **Returned values** -- The average length of the [H3](#h3index) hexagon edge in meters. Type: [Float64](../../../sql-reference/data-types/float.md). +- The average length of the [H3](#h3index) hexagon edge in meters. [Float64](../../data-types/float.md). **Example** @@ -168,11 +164,11 @@ h3EdgeLengthKm(resolution) **Parameter** -- `resolution` — Index resolution. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). Range: `[0, 15]`. +- `resolution` — Index resolution. [UInt8](../../data-types/int-uint.md). Range: `[0, 15]`. **Returned values** -- The average length of the [H3](#h3index) hexagon edge in kilometers. Type: [Float64](../../../sql-reference/data-types/float.md). +- The average length of the [H3](#h3index) hexagon edge in kilometers. [Float64](../../data-types/float.md). **Example** @@ -202,16 +198,14 @@ geoToH3(lon, lat, resolution) **Arguments** -- `lon` — Longitude. Type: [Float64](../../../sql-reference/data-types/float.md). -- `lat` — Latitude. Type: [Float64](../../../sql-reference/data-types/float.md). -- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). +- `lon` — Longitude. [Float64](../../data-types/float.md). +- `lat` — Latitude. [Float64](../../data-types/float.md). +- `resolution` — Index resolution. Range: `[0, 15]`. [UInt8](../../data-types/int-uint.md). **Returned values** -- Hexagon index number. -- 0 in case of error. - -Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- Hexagon index number. [UInt64](../../data-types/int-uint.md). +- 0 in case of error. [UInt64](../../data-types/int-uint.md). **Example** @@ -241,11 +235,11 @@ h3ToGeo(h3Index) **Arguments** -- `h3Index` — H3 Index. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `h3Index` — H3 Index. [UInt64](../../data-types/int-uint.md). **Returned values** -- A tuple consisting of two values: `tuple(lon,lat)`. `lon` — Longitude. [Float64](../../../sql-reference/data-types/float.md). `lat` — Latitude. [Float64](../../../sql-reference/data-types/float.md). +- A tuple consisting of two values: `tuple(lon,lat)`. `lon` — Longitude. [Float64](../../data-types/float.md). `lat` — Latitude. [Float64](../../data-types/float.md). **Example** @@ -275,12 +269,11 @@ h3ToGeoBoundary(h3Index) **Arguments** -- `h3Index` — H3 Index. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `h3Index` — H3 Index. [UInt64](../../data-types/int-uint.md). **Returned values** -- Array of pairs '(lon, lat)'. -Type: [Array](../../../sql-reference/data-types/array.md)([Float64](../../../sql-reference/data-types/float.md), [Float64](../../../sql-reference/data-types/float.md)). +- Array of pairs '(lon, lat)'. [Array](../../data-types/array.md)([Float64](../../data-types/float.md), [Float64](../../data-types/float.md)). **Example** @@ -311,14 +304,12 @@ h3kRing(h3index, k) **Arguments** -- `h3index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). -- `k` — Radius. Type: [integer](../../../sql-reference/data-types/int-uint.md) +- `h3index` — Hexagon index number. [UInt64](../../data-types/int-uint.md). +- `k` — Radius. [integer](../../data-types/int-uint.md) **Returned values** -- Array of H3 indexes. - -Type: [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)). +- Array of H3 indexes. [Array](../../data-types/array.md)([UInt64](../../data-types/int-uint.md)). **Example** @@ -354,13 +345,11 @@ h3GetBaseCell(index) **Parameter** -- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. [UInt64](../../data-types/int-uint.md). **Returned value** -- Hexagon base cell number. - -Type: [UInt8](../../../sql-reference/data-types/int-uint.md). +- Hexagon base cell number. [UInt8](../../data-types/int-uint.md). **Example** @@ -390,13 +379,11 @@ h3HexAreaM2(resolution) **Parameter** -- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). +- `resolution` — Index resolution. Range: `[0, 15]`. [UInt8](../../data-types/int-uint.md). **Returned value** -- Area in square meters. - -Type: [Float64](../../../sql-reference/data-types/float.md). +- Area in square meters. [Float64](../../data-types/float.md). **Example** @@ -426,13 +413,11 @@ h3HexAreaKm2(resolution) **Parameter** -- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). +- `resolution` — Index resolution. Range: `[0, 15]`. [UInt8](../../data-types/int-uint.md). **Returned value** -- Area in square kilometers. - -Type: [Float64](../../../sql-reference/data-types/float.md). +- Area in square kilometers. [Float64](../../data-types/float.md). **Example** @@ -462,15 +447,13 @@ h3IndexesAreNeighbors(index1, index2) **Arguments** -- `index1` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). -- `index2` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index1` — Hexagon index number. [UInt64](../../data-types/int-uint.md). +- `index2` — Hexagon index number. [UInt64](../../data-types/int-uint.md). **Returned value** -- `1` — Indexes are neighbours. -- `0` — Indexes are not neighbours. - -Type: [UInt8](../../../sql-reference/data-types/int-uint.md). +- `1` — Indexes are neighbours. [UInt8](../../data-types/int-uint.md). +- `0` — Indexes are not neighbours. [UInt8](../../data-types/int-uint.md). **Example** @@ -500,14 +483,12 @@ h3ToChildren(index, resolution) **Arguments** -- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). -- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. [UInt64](../../data-types/int-uint.md). +- `resolution` — Index resolution. Range: `[0, 15]`. [UInt8](../../data-types/int-uint.md). **Returned values** -- Array of the child H3-indexes. - -Type: [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)). +- Array of the child H3-indexes. [Array](../../data-types/array.md)([UInt64](../../data-types/int-uint.md)). **Example** @@ -537,14 +518,12 @@ h3ToParent(index, resolution) **Arguments** -- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). -- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. [UInt64](../../data-types/int-uint.md). +- `resolution` — Index resolution. Range: `[0, 15]`. [UInt8](../../data-types/int-uint.md). **Returned value** -- Parent H3 index. - -Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- Parent H3 index. [UInt64](../../data-types/int-uint.md). **Example** @@ -572,13 +551,11 @@ h3ToString(index) **Parameter** -- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. [UInt64](../../data-types/int-uint.md). **Returned value** -- String representation of the H3 index. - -Type: [String](../../../sql-reference/data-types/string.md). +- String representation of the H3 index. [String](../../data-types/string.md). **Example** @@ -608,11 +585,11 @@ stringToH3(index_str) **Parameter** -- `index_str` — String representation of the H3 index. Type: [String](../../../sql-reference/data-types/string.md). +- `index_str` — String representation of the H3 index. [String](../../data-types/string.md). **Returned value** -- Hexagon index number. Returns 0 on error. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- Hexagon index number. Returns 0 on error. [UInt64](../../data-types/int-uint.md). **Example** @@ -642,11 +619,11 @@ h3GetResolution(index) **Parameter** -- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. [UInt64](../../data-types/int-uint.md). **Returned value** -- Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). +- Index resolution. Range: `[0, 15]`. [UInt8](../../data-types/int-uint.md). **Example** @@ -676,14 +653,12 @@ h3IsResClassIII(index) **Parameter** -- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. [UInt64](../../data-types/int-uint.md). **Returned value** -- `1` — Index has a resolution with Class III orientation. -- `0` — Index doesn't have a resolution with Class III orientation. - -Type: [UInt8](../../../sql-reference/data-types/int-uint.md). +- `1` — Index has a resolution with Class III orientation. [UInt8](../../data-types/int-uint.md). +- `0` — Index doesn't have a resolution with Class III orientation. [UInt8](../../data-types/int-uint.md). **Example** @@ -713,14 +688,12 @@ h3IsPentagon(index) **Parameter** -- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. [UInt64](../../data-types/int-uint.md). **Returned value** -- `1` — Index represents a pentagonal cell. -- `0` — Index doesn't represent a pentagonal cell. - -Type: [UInt8](../../../sql-reference/data-types/int-uint.md). +- `1` — Index represents a pentagonal cell. [UInt8](../../data-types/int-uint.md). +- `0` — Index doesn't represent a pentagonal cell. [UInt8](../../data-types/int-uint.md). **Example** @@ -750,13 +723,11 @@ h3GetFaces(index) **Parameter** -- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. [UInt64](../../data-types/int-uint.md). **Returned values** -- Array containing icosahedron faces intersected by a given H3 index. - -Type: [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)). +- Array containing icosahedron faces intersected by a given H3 index. [Array](../../data-types/array.md)([UInt64](../../data-types/int-uint.md)). **Example** @@ -786,13 +757,11 @@ h3CellAreaM2(index) **Parameter** -- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. [UInt64](../../data-types/int-uint.md). **Returned value** -- Cell area in square meters. - -Type: [Float64](../../../sql-reference/data-types/float.md). +- Cell area in square meters. [Float64](../../data-types/float.md). **Example** @@ -822,13 +791,11 @@ h3CellAreaRads2(index) **Parameter** -- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. [UInt64](../../data-types/int-uint.md). **Returned value** -- Cell area in square radians. - -Type: [Float64](../../../sql-reference/data-types/float.md). +- Cell area in square radians. [Float64](../../data-types/float.md). **Example** @@ -858,14 +825,12 @@ h3ToCenterChild(index, resolution) **Parameter** -- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). -- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. [UInt64](../../data-types/int-uint.md). +- `resolution` — Index resolution. Range: `[0, 15]`. [UInt8](../../data-types/int-uint.md). **Returned values** -- [H3](#h3index) index of the center child contained by given [H3](#h3index) at the given resolution. - -Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- [H3](#h3index) index of the center child contained by given [H3](#h3index) at the given resolution. [UInt64](../../data-types/int-uint.md). **Example** @@ -895,13 +860,11 @@ h3ExactEdgeLengthM(index) **Parameter** -- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. [UInt64](../../data-types/int-uint.md). **Returned value** -- Exact edge length in meters. - -Type: [Float64](../../../sql-reference/data-types/float.md). +- Exact edge length in meters. [Float64](../../data-types/float.md). **Example** @@ -931,13 +894,11 @@ h3ExactEdgeLengthKm(index) **Parameter** -- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. [UInt64](../../data-types/int-uint.md). **Returned value** -- Exact edge length in kilometers. - -Type: [Float64](../../../sql-reference/data-types/float.md). +- Exact edge length in kilometers. [Float64](../../data-types/float.md). **Example** @@ -967,13 +928,11 @@ h3ExactEdgeLengthRads(index) **Parameter** -- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. [UInt64](../../data-types/int-uint.md). **Returned value** -- Exact edge length in radians. - -Type: [Float64](../../../sql-reference/data-types/float.md). +- Exact edge length in radians. [Float64](../../data-types/float.md). **Example** @@ -1003,13 +962,11 @@ h3NumHexagons(resolution) **Parameter** -- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). +- `resolution` — Index resolution. Range: `[0, 15]`. [UInt8](../../data-types/int-uint.md). **Returned value** -- Number of H3 indices. - -Type: [Int64](../../../sql-reference/data-types/int-uint.md). +- Number of H3 indices. [Int64](../../data-types/int-uint.md). **Example** @@ -1039,14 +996,12 @@ h3PointDistM(lat1, lon1, lat2, lon2) **Arguments** -- `lat1`, `lon1` — Latitude and Longitude of point1 in degrees. Type: [Float64](../../../sql-reference/data-types/float.md). -- `lat2`, `lon2` — Latitude and Longitude of point2 in degrees. Type: [Float64](../../../sql-reference/data-types/float.md). +- `lat1`, `lon1` — Latitude and Longitude of point1 in degrees. [Float64](../../data-types/float.md). +- `lat2`, `lon2` — Latitude and Longitude of point2 in degrees. [Float64](../../data-types/float.md). **Returned values** -- Haversine or great circle distance in meters. - -Type: [Float64](../../../sql-reference/data-types/float.md). +- Haversine or great circle distance in meters.[Float64](../../data-types/float.md). **Example** @@ -1076,14 +1031,12 @@ h3PointDistKm(lat1, lon1, lat2, lon2) **Arguments** -- `lat1`, `lon1` — Latitude and Longitude of point1 in degrees. Type: [Float64](../../../sql-reference/data-types/float.md). -- `lat2`, `lon2` — Latitude and Longitude of point2 in degrees. Type: [Float64](../../../sql-reference/data-types/float.md). +- `lat1`, `lon1` — Latitude and Longitude of point1 in degrees. [Float64](../../data-types/float.md). +- `lat2`, `lon2` — Latitude and Longitude of point2 in degrees. [Float64](../../data-types/float.md). **Returned values** -- Haversine or great circle distance in kilometers. - -Type: [Float64](../../../sql-reference/data-types/float.md). +- Haversine or great circle distance in kilometers. [Float64](../../data-types/float.md). **Example** @@ -1113,14 +1066,12 @@ h3PointDistRads(lat1, lon1, lat2, lon2) **Arguments** -- `lat1`, `lon1` — Latitude and Longitude of point1 in degrees. Type: [Float64](../../../sql-reference/data-types/float.md). -- `lat2`, `lon2` — Latitude and Longitude of point2 in degrees. Type: [Float64](../../../sql-reference/data-types/float.md). +- `lat1`, `lon1` — Latitude and Longitude of point1 in degrees. [Float64](../../data-types/float.md). +- `lat2`, `lon2` — Latitude and Longitude of point2 in degrees. [Float64](../../data-types/float.md). **Returned values** -- Haversine or great circle distance in radians. - -Type: [Float64](../../../sql-reference/data-types/float.md). +- Haversine or great circle distance in radians. [Float64](../../data-types/float.md). **Example** @@ -1150,9 +1101,7 @@ h3GetRes0Indexes() **Returned values** -- Array of all the resolution 0 H3 indexes. - -Type: [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)). +- Array of all the resolution 0 H3 indexes. [Array](../../data-types/array.md)([UInt64](../../data-types/int-uint.md)). **Example** @@ -1183,13 +1132,11 @@ h3GetPentagonIndexes(resolution) **Parameter** -- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). +- `resolution` — Index resolution. Range: `[0, 15]`. [UInt8](../../data-types/int-uint.md). **Returned value** -- Array of all pentagon H3 indexes. - -Type: [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)). +- Array of all pentagon H3 indexes. [Array](../../data-types/array.md)([UInt64](../../data-types/int-uint.md)). **Example** @@ -1219,14 +1166,12 @@ h3Line(start,end) **Parameter** -- `start` — Hexagon index number that represents a starting point. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). -- `end` — Hexagon index number that represents an ending point. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `start` — Hexagon index number that represents a starting point. [UInt64](../../data-types/int-uint.md). +- `end` — Hexagon index number that represents an ending point. [UInt64](../../data-types/int-uint.md). **Returned value** -Array of h3 indexes representing the line of indices between the two provided indices: - -Type: [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)). +Array of h3 indexes representing the line of indices between the two provided indices. [Array](../../data-types/array.md)([UInt64](../../data-types/int-uint.md)). **Example** @@ -1256,14 +1201,12 @@ h3Distance(start,end) **Parameter** -- `start` — Hexagon index number that represents a starting point. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). -- `end` — Hexagon index number that represents an ending point. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `start` — Hexagon index number that represents a starting point. [UInt64](../../data-types/int-uint.md). +- `end` — Hexagon index number that represents an ending point. [UInt64](../../data-types/int-uint.md). **Returned value** -- Number of grid cells. - -Type: [Int64](../../../sql-reference/data-types/int-uint.md). +- Number of grid cells. [Int64](../../data-types/int-uint.md). Returns a negative number if finding the distance fails. @@ -1297,14 +1240,12 @@ h3HexRing(index, k) **Parameter** -- `index` — Hexagon index number that represents the origin. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). -- `k` — Distance. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number that represents the origin. [UInt64](../../data-types/int-uint.md). +- `k` — Distance. [UInt64](../../data-types/int-uint.md). **Returned values** -- Array of H3 indexes. - -Type: [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)). +- Array of H3 indexes. [Array](../../data-types/array.md)([UInt64](../../data-types/int-uint.md)). **Example** @@ -1334,14 +1275,12 @@ h3GetUnidirectionalEdge(originIndex, destinationIndex) **Parameter** -- `originIndex` — Origin Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). -- `destinationIndex` — Destination Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `originIndex` — Origin Hexagon index number. [UInt64](../../data-types/int-uint.md). +- `destinationIndex` — Destination Hexagon index number. [UInt64](../../data-types/int-uint.md). **Returned value** -- Unidirectional Edge Hexagon Index number. - -Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- Unidirectional Edge Hexagon Index number. [UInt64](../../data-types/int-uint.md). **Example** @@ -1371,14 +1310,12 @@ h3UnidirectionalEdgeisValid(index) **Parameter** -- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. [UInt64](../../data-types/int-uint.md). **Returned value** -- 1 — The H3 index is a valid unidirectional edge. -- 0 — The H3 index is not a valid unidirectional edge. - -Type: [UInt8](../../../sql-reference/data-types/int-uint.md). +- 1 — The H3 index is a valid unidirectional edge. [UInt8](../../data-types/int-uint.md). +- 0 — The H3 index is not a valid unidirectional edge. [UInt8](../../data-types/int-uint.md). **Example** @@ -1408,13 +1345,11 @@ h3GetOriginIndexFromUnidirectionalEdge(edge) **Parameter** -- `edge` — Hexagon index number that represents a unidirectional edge. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `edge` — Hexagon index number that represents a unidirectional edge. [UInt64](../../data-types/int-uint.md). **Returned value** -- Origin Hexagon Index number. - -Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- Origin Hexagon Index number. [UInt64](../../data-types/int-uint.md). **Example** @@ -1444,13 +1379,11 @@ h3GetDestinationIndexFromUnidirectionalEdge(edge) **Parameter** -- `edge` — Hexagon index number that represents a unidirectional edge. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `edge` — Hexagon index number that represents a unidirectional edge. [UInt64](../../data-types/int-uint.md). **Returned value** -- Destination Hexagon Index number. - -Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- Destination Hexagon Index number. [UInt64](../../data-types/int-uint.md). **Example** @@ -1480,14 +1413,14 @@ h3GetIndexesFromUnidirectionalEdge(edge) **Parameter** -- `edge` — Hexagon index number that represents a unidirectional edge. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `edge` — Hexagon index number that represents a unidirectional edge. [UInt64](../../data-types/int-uint.md). **Returned value** A tuple consisting of two values `tuple(origin,destination)`: -- `origin` — Origin Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md). -- `destination` — Destination Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `origin` — Origin Hexagon index number. [UInt64](../../data-types/int-uint.md). +- `destination` — Destination Hexagon index number. [UInt64](../../data-types/int-uint.md). Returns `(0,0)` if the provided input is not valid. @@ -1519,13 +1452,11 @@ h3GetUnidirectionalEdgesFromHexagon(index) **Parameter** -- `index` — Hexagon index number that represents a unidirectional edge. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number that represents a unidirectional edge. [UInt64](../../data-types/int-uint.md). **Returned value** -Array of h3 indexes representing each unidirectional edge: - -Type: [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)). +Array of h3 indexes representing each unidirectional edge. [Array](../../data-types/array.md)([UInt64](../../data-types/int-uint.md)). **Example** @@ -1555,12 +1486,11 @@ h3GetUnidirectionalEdgeBoundary(index) **Parameter** -- `index` — Hexagon index number that represents a unidirectional edge. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number that represents a unidirectional edge. [UInt64](../../data-types/int-uint.md). **Returned value** -- Array of pairs '(lon, lat)'. - Type: [Array](../../../sql-reference/data-types/array.md)([Float64](../../../sql-reference/data-types/float.md), [Float64](../../../sql-reference/data-types/float.md)). +- Array of pairs '(lon, lat)'. [Array](../../data-types/array.md)([Float64](../../data-types/float.md), [Float64](../../data-types/float.md)). **Example** diff --git a/docs/en/sql-reference/functions/geo/s2.md b/docs/en/sql-reference/functions/geo/s2.md index f4702eff44b..3165b21318b 100644 --- a/docs/en/sql-reference/functions/geo/s2.md +++ b/docs/en/sql-reference/functions/geo/s2.md @@ -21,14 +21,12 @@ geoToS2(lon, lat) **Arguments** -- `lon` — Longitude. [Float64](../../../sql-reference/data-types/float.md). -- `lat` — Latitude. [Float64](../../../sql-reference/data-types/float.md). +- `lon` — Longitude. [Float64](../../data-types/float.md). +- `lat` — Latitude. [Float64](../../data-types/float.md). **Returned values** -- S2 point index. - -Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- S2 point index. [UInt64](../../data-types/int-uint.md). **Example** @@ -58,13 +56,13 @@ s2ToGeo(s2index) **Arguments** -- `s2index` — S2 Index. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `s2index` — S2 Index. [UInt64](../../data-types/int-uint.md). **Returned values** -- A tuple consisting of two values: `tuple(lon,lat)`. - -Type: `lon` — [Float64](../../../sql-reference/data-types/float.md). `lat` — [Float64](../../../sql-reference/data-types/float.md). +- A [tuple](../../data-types/tuple.md) consisting of two values: + - `lon`. [Float64](../../data-types/float.md). + - `lat`. [Float64](../../data-types/float.md). **Example** @@ -94,13 +92,11 @@ s2GetNeighbors(s2index) **Arguments** -- `s2index` — S2 Index. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `s2index` — S2 Index. [UInt64](../../data-types/int-uint.md). -**Returned values** +**Returned value** -- An array consisting of 4 neighbor indexes: `array[s2index1, s2index3, s2index2, s2index4]`. - -Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- An array consisting of 4 neighbor indexes: `array[s2index1, s2index3, s2index2, s2index4]`. [Array](../../data-types/array.md)([UInt64](../../data-types/int-uint.md)). **Example** @@ -130,14 +126,12 @@ s2CellsIntersect(s2index1, s2index2) **Arguments** -- `siIndex1`, `s2index2` — S2 Index. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `siIndex1`, `s2index2` — S2 Index. [UInt64](../../data-types/int-uint.md). -**Returned values** +**Returned value** -- 1 — If the cells intersect. -- 0 — If the cells don't intersect. - -Type: [UInt8](../../../sql-reference/data-types/int-uint.md). +- `1` — If the cells intersect. [UInt8](../../data-types/int-uint.md). +- `0` — If the cells don't intersect. [UInt8](../../data-types/int-uint.md). **Example** @@ -167,16 +161,14 @@ s2CapContains(center, degrees, point) **Arguments** -- `center` — S2 point index corresponding to the cap. [UInt64](../../../sql-reference/data-types/int-uint.md). -- `degrees` — Radius of the cap in degrees. [Float64](../../../sql-reference/data-types/float.md). -- `point` — S2 point index. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `center` — S2 point index corresponding to the cap. [UInt64](../../data-types/int-uint.md). +- `degrees` — Radius of the cap in degrees. [Float64](../../data-types/float.md). +- `point` — S2 point index. [UInt64](../../data-types/int-uint.md). -**Returned values** +**Returned value** -- 1 — If the cap contains the S2 point index. -- 0 — If the cap doesn't contain the S2 point index. - -Type: [UInt8](../../../sql-reference/data-types/int-uint.md). +- `1` — If the cap contains the S2 point index. [UInt8](../../data-types/int-uint.md). +- `0` — If the cap doesn't contain the S2 point index. [UInt8](../../data-types/int-uint.md). **Example** @@ -206,13 +198,13 @@ s2CapUnion(center1, radius1, center2, radius2) **Arguments** -- `center1`, `center2` — S2 point indexes corresponding to the two input caps. [UInt64](../../../sql-reference/data-types/int-uint.md). -- `radius1`, `radius2` — Radius of the two input caps in degrees. [Float64](../../../sql-reference/data-types/float.md). +- `center1`, `center2` — S2 point indexes corresponding to the two input caps. [UInt64](../../data-types/int-uint.md). +- `radius1`, `radius2` — Radius of the two input caps in degrees. [Float64](../../data-types/float.md). **Returned values** -- `center` — S2 point index corresponding the center of the smallest cap containing the two input caps. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). -- `radius` — Radius of the smallest cap containing the two input caps. Type: [Float64](../../../sql-reference/data-types/float.md). +- `center` — S2 point index corresponding the center of the smallest cap containing the two input caps. [UInt64](../../data-types/int-uint.md). +- `radius` — Radius of the smallest cap containing the two input caps. [Float64](../../data-types/float.md). **Example** @@ -242,14 +234,14 @@ s2RectAdd(s2pointLow, s2pointHigh, s2Point) **Arguments** -- `s2PointLow` — Low S2 point index corresponding to the rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md). -- `s2PointHigh` — High S2 point index corresponding to the rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md). -- `s2Point` — Target S2 point index that the bound rectangle should be grown to include. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `s2PointLow` — Low S2 point index corresponding to the rectangle. [UInt64](../../data-types/int-uint.md). +- `s2PointHigh` — High S2 point index corresponding to the rectangle. [UInt64](../../data-types/int-uint.md). +- `s2Point` — Target S2 point index that the bound rectangle should be grown to include. [UInt64](../../data-types/int-uint.md). **Returned values** -- `s2PointLow` — Low S2 cell id corresponding to the grown rectangle. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). -- `s2PointHigh` — Height S2 cell id corresponding to the grown rectangle. Type: [UInt64](../../../sql-reference/data-types/float.md). +- `s2PointLow` — Low S2 cell id corresponding to the grown rectangle. [UInt64](../../data-types/int-uint.md). +- `s2PointHigh` — Height S2 cell id corresponding to the grown rectangle. [UInt64](../../data-types/float.md). **Example** @@ -279,14 +271,14 @@ s2RectContains(s2PointLow, s2PointHi, s2Point) **Arguments** -- `s2PointLow` — Low S2 point index corresponding to the rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md). -- `s2PointHigh` — High S2 point index corresponding to the rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md). -- `s2Point` — Target S2 point index. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `s2PointLow` — Low S2 point index corresponding to the rectangle. [UInt64](../../data-types/int-uint.md). +- `s2PointHigh` — High S2 point index corresponding to the rectangle. [UInt64](../../data-types/int-uint.md). +- `s2Point` — Target S2 point index. [UInt64](../../data-types/int-uint.md). -**Returned values** +**Returned value** -- 1 — If the rectangle contains the given S2 point. -- 0 — If the rectangle doesn't contain the given S2 point. +- `1` — If the rectangle contains the given S2 point. +- `0` — If the rectangle doesn't contain the given S2 point. **Example** @@ -316,13 +308,13 @@ s2RectUnion(s2Rect1PointLow, s2Rect1PointHi, s2Rect2PointLow, s2Rect2PointHi) **Arguments** -- `s2Rect1PointLow`, `s2Rect1PointHi` — Low and High S2 point indexes corresponding to the first rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md). -- `s2Rect2PointLow`, `s2Rect2PointHi` — Low and High S2 point indexes corresponding to the second rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `s2Rect1PointLow`, `s2Rect1PointHi` — Low and High S2 point indexes corresponding to the first rectangle. [UInt64](../../data-types/int-uint.md). +- `s2Rect2PointLow`, `s2Rect2PointHi` — Low and High S2 point indexes corresponding to the second rectangle. [UInt64](../../data-types/int-uint.md). **Returned values** -- `s2UnionRect2PointLow` — Low S2 cell id corresponding to the union rectangle. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). -- `s2UnionRect2PointHi` — High S2 cell id corresponding to the union rectangle. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `s2UnionRect2PointLow` — Low S2 cell id corresponding to the union rectangle. [UInt64](../../data-types/int-uint.md). +- `s2UnionRect2PointHi` — High S2 cell id corresponding to the union rectangle. [UInt64](../../data-types/int-uint.md). **Example** @@ -352,13 +344,13 @@ s2RectIntersection(s2Rect1PointLow, s2Rect1PointHi, s2Rect2PointLow, s2Rect2Poin **Arguments** -- `s2Rect1PointLow`, `s2Rect1PointHi` — Low and High S2 point indexes corresponding to the first rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md). -- `s2Rect2PointLow`, `s2Rect2PointHi` — Low and High S2 point indexes corresponding to the second rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `s2Rect1PointLow`, `s2Rect1PointHi` — Low and High S2 point indexes corresponding to the first rectangle. [UInt64](../../data-types/int-uint.md). +- `s2Rect2PointLow`, `s2Rect2PointHi` — Low and High S2 point indexes corresponding to the second rectangle. [UInt64](../../data-types/int-uint.md). **Returned values** -- `s2UnionRect2PointLow` — Low S2 cell id corresponding to the rectangle containing the intersection of the given rectangles. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). -- `s2UnionRect2PointHi` — High S2 cell id corresponding to the rectangle containing the intersection of the given rectangles. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `s2UnionRect2PointLow` — Low S2 cell id corresponding to the rectangle containing the intersection of the given rectangles. [UInt64](../../data-types/int-uint.md). +- `s2UnionRect2PointHi` — High S2 cell id corresponding to the rectangle containing the intersection of the given rectangles. [UInt64](../../data-types/int-uint.md). **Example** diff --git a/docs/en/sql-reference/functions/geo/svg.md b/docs/en/sql-reference/functions/geo/svg.md index c565d1f9de7..320d4542fee 100644 --- a/docs/en/sql-reference/functions/geo/svg.md +++ b/docs/en/sql-reference/functions/geo/svg.md @@ -23,13 +23,11 @@ Aliases: `SVG`, `svg` **Returned value** -- The SVG representation of the geometry: +- The SVG representation of the geometry. [String](../../data-types/string). - SVG circle - SVG polygon - SVG path -Type: [String](../../data-types/string) - **Examples** **Circle** diff --git a/docs/en/sql-reference/functions/hash-functions.md b/docs/en/sql-reference/functions/hash-functions.md index 1cd7eeb7c83..506114038f7 100644 --- a/docs/en/sql-reference/functions/hash-functions.md +++ b/docs/en/sql-reference/functions/hash-functions.md @@ -12,7 +12,7 @@ Simhash is a hash function, which returns close hash values for close (similar) ## halfMD5 -[Interprets](/docs/en/sql-reference/functions/type-conversion-functions.md/#type_conversion_functions-reinterpretAsString) all the input parameters as strings and calculates the [MD5](https://en.wikipedia.org/wiki/MD5) hash value for each of them. Then combines hashes, takes the first 8 bytes of the hash of the resulting string, and interprets them as `UInt64` in big-endian byte order. +[Interprets](../functions/type-conversion-functions.md/#type_conversion_functions-reinterpretAsString) all the input parameters as strings and calculates the [MD5](https://en.wikipedia.org/wiki/MD5) hash value for each of them. Then combines hashes, takes the first 8 bytes of the hash of the resulting string, and interprets them as `UInt64` in big-endian byte order. ```sql halfMD5(par1, ...) @@ -23,11 +23,11 @@ Consider using the [sipHash64](#siphash64) function instead. **Arguments** -The function takes a variable number of input parameters. Arguments can be any of the [supported data types](/docs/en/sql-reference/data-types/index.md). For some data types calculated value of hash function may be the same for the same values even if types of arguments differ (integers of different size, named and unnamed `Tuple` with the same data, `Map` and the corresponding `Array(Tuple(key, value))` type with the same data). +The function takes a variable number of input parameters. Arguments can be any of the [supported data types](../data-types/index.md). For some data types calculated value of hash function may be the same for the same values even if types of arguments differ (integers of different size, named and unnamed `Tuple` with the same data, `Map` and the corresponding `Array(Tuple(key, value))` type with the same data). **Returned Value** -A [UInt64](/docs/en/sql-reference/data-types/int-uint.md) data type hash value. +A [UInt64](../data-types/int-uint.md) data type hash value. **Example** @@ -61,7 +61,7 @@ sipHash64(par1,...) This is a cryptographic hash function. It works at least three times faster than the [MD5](#md5) hash function. -The function [interprets](/docs/en/sql-reference/functions/type-conversion-functions.md/#type_conversion_functions-reinterpretAsString) all the input parameters as strings and calculates the hash value for each of them. It then combines the hashes by the following algorithm: +The function [interprets](../functions/type-conversion-functions.md/#type_conversion_functions-reinterpretAsString) all the input parameters as strings and calculates the hash value for each of them. It then combines the hashes by the following algorithm: 1. The first and the second hash value are concatenated to an array which is hashed. 2. The previously calculated hash value and the hash of the third input parameter are hashed in a similar way. @@ -69,11 +69,11 @@ The function [interprets](/docs/en/sql-reference/functions/type-conversion-funct **Arguments** -The function takes a variable number of input parameters of any of the [supported data types](/docs/en/sql-reference/data-types/index.md). +The function takes a variable number of input parameters of any of the [supported data types](../data-types/index.md). **Returned Value** -A [UInt64](/docs/en/sql-reference/data-types/int-uint.md) data type hash value. +A [UInt64](../data-types/int-uint.md) data type hash value. Note that the calculated hash values may be equal for the same input values of different argument types. This affects for example integer types of different size, named and unnamed `Tuple` with the same data, `Map` and the corresponding `Array(Tuple(key, value))` type with the same data. @@ -105,7 +105,7 @@ Same as [sipHash64](#siphash64), but the first argument is a tuple of two UInt64 **Returned value** -A [UInt64](/docs/en/sql-reference/data-types/int-uint.md) data type hash value. +A [UInt64](../data-types/int-uint.md) data type hash value. **Example** @@ -143,7 +143,7 @@ Same as for [sipHash64](#siphash64). **Returned value** -A 128-bit `SipHash` hash value of type [FixedString(16)](/docs/en/sql-reference/data-types/fixedstring.md). +A 128-bit `SipHash` hash value of type [FixedString(16)](../data-types/fixedstring.md). **Example** @@ -183,7 +183,7 @@ Same as [sipHash128](#siphash128), but the first argument is a tuple of two UInt **Returned value** -A 128-bit `SipHash` hash value of type [FixedString(16)](/docs/en/sql-reference/data-types/fixedstring.md). +A 128-bit `SipHash` hash value of type [FixedString(16)](../data-types/fixedstring.md). **Example** @@ -217,7 +217,7 @@ Same as for [sipHash128](#siphash128). **Returned value** -A 128-bit `SipHash` hash value of type [FixedString(16)](/docs/en/sql-reference/data-types/fixedstring.md). +A 128-bit `SipHash` hash value of type [FixedString(16)](../data-types/fixedstring.md). **Example** @@ -251,7 +251,7 @@ Same as [sipHash128Reference](#siphash128reference), but the first argument is a **Returned value** -A 128-bit `SipHash` hash value of type [FixedString(16)](/docs/en/sql-reference/data-types/fixedstring.md). +A 128-bit `SipHash` hash value of type [FixedString(16)](../data-types/fixedstring.md). **Example** @@ -283,11 +283,11 @@ Note that Google changed the algorithm of CityHash after it has been added to Cl **Arguments** -The function takes a variable number of input parameters. Arguments can be any of the [supported data types](/docs/en/sql-reference/data-types/index.md). For some data types calculated value of hash function may be the same for the same values even if types of arguments differ (integers of different size, named and unnamed `Tuple` with the same data, `Map` and the corresponding `Array(Tuple(key, value))` type with the same data). +The function takes a variable number of input parameters. Arguments can be any of the [supported data types](../data-types/index.md). For some data types calculated value of hash function may be the same for the same values even if types of arguments differ (integers of different size, named and unnamed `Tuple` with the same data, `Map` and the corresponding `Array(Tuple(key, value))` type with the same data). **Returned Value** -A [UInt64](/docs/en/sql-reference/data-types/int-uint.md) data type hash value. +A [UInt64](../data-types/int-uint.md) data type hash value. **Examples** @@ -321,7 +321,7 @@ It works faster than intHash32. Average quality. ## SHA1, SHA224, SHA256, SHA512, SHA512_256 -Calculates SHA-1, SHA-224, SHA-256, SHA-512, SHA-512-256 hash from a string and returns the resulting set of bytes as [FixedString](/docs/en/sql-reference/data-types/fixedstring.md). +Calculates SHA-1, SHA-224, SHA-256, SHA-512, SHA-512-256 hash from a string and returns the resulting set of bytes as [FixedString](../data-types/fixedstring.md). **Syntax** @@ -337,17 +337,15 @@ Even in these cases, we recommend applying the function offline and pre-calculat **Arguments** -- `s` — Input string for SHA hash calculation. [String](/docs/en/sql-reference/data-types/string.md). +- `s` — Input string for SHA hash calculation. [String](../data-types/string.md). **Returned value** -- SHA hash as a hex-unencoded FixedString. SHA-1 returns as FixedString(20), SHA-224 as FixedString(28), SHA-256 — FixedString(32), SHA-512 — FixedString(64). - -Type: [FixedString](/docs/en/sql-reference/data-types/fixedstring.md). +- SHA hash as a hex-unencoded FixedString. SHA-1 returns as FixedString(20), SHA-224 as FixedString(28), SHA-256 — FixedString(32), SHA-512 — FixedString(64). [FixedString](../data-types/fixedstring.md). **Example** -Use the [hex](/docs/en/sql-reference/functions/encoding-functions.md/#hex) function to represent the result as a hex-encoded string. +Use the [hex](../functions/encoding-functions.md/#hex) function to represent the result as a hex-encoded string. Query: @@ -365,7 +363,7 @@ Result: ## BLAKE3 -Calculates BLAKE3 hash string and returns the resulting set of bytes as [FixedString](/docs/en/sql-reference/data-types/fixedstring.md). +Calculates BLAKE3 hash string and returns the resulting set of bytes as [FixedString](../data-types/fixedstring.md). **Syntax** @@ -377,17 +375,15 @@ This cryptographic hash-function is integrated into ClickHouse with BLAKE3 Rust **Arguments** -- s - input string for BLAKE3 hash calculation. [String](/docs/en/sql-reference/data-types/string.md). +- s - input string for BLAKE3 hash calculation. [String](../data-types/string.md). **Return value** -- BLAKE3 hash as a byte array with type FixedString(32). - -Type: [FixedString](/docs/en/sql-reference/data-types/fixedstring.md). +- BLAKE3 hash as a byte array with type FixedString(32). [FixedString](../data-types/fixedstring.md). **Example** -Use function [hex](/docs/en/sql-reference/functions/encoding-functions.md/#hex) to represent the result as a hex-encoded string. +Use function [hex](../functions/encoding-functions.md/#hex) to represent the result as a hex-encoded string. Query: ```sql @@ -423,11 +419,11 @@ These functions use the `Fingerprint64` and `Hash64` methods respectively from a **Arguments** -The function takes a variable number of input parameters. Arguments can be any of the [supported data types](/docs/en/sql-reference/data-types/index.md). For some data types calculated value of hash function may be the same for the same values even if types of arguments differ (integers of different size, named and unnamed `Tuple` with the same data, `Map` and the corresponding `Array(Tuple(key, value))` type with the same data). +The function takes a variable number of input parameters. Arguments can be any of the [supported data types](../data-types/index.md). For some data types calculated value of hash function may be the same for the same values even if types of arguments differ (integers of different size, named and unnamed `Tuple` with the same data, `Map` and the corresponding `Array(Tuple(key, value))` type with the same data). **Returned Value** -A [UInt64](/docs/en/sql-reference/data-types/int-uint.md) data type hash value. +A [UInt64](../data-types/int-uint.md) data type hash value. **Example** @@ -540,9 +536,7 @@ This is just [JavaHash](#javahash) with zeroed out sign bit. This function is us **Returned value** -A `Int32` data type hash value. - -Type: `hiveHash`. +- `hiveHash` hash value. [Int32](../data-types/int-uint.md). **Example** @@ -570,11 +564,11 @@ metroHash64(par1, ...) **Arguments** -The function takes a variable number of input parameters. Arguments can be any of the [supported data types](/docs/en/sql-reference/data-types/index.md). For some data types calculated value of hash function may be the same for the same values even if types of arguments differ (integers of different size, named and unnamed `Tuple` with the same data, `Map` and the corresponding `Array(Tuple(key, value))` type with the same data). +The function takes a variable number of input parameters. Arguments can be any of the [supported data types](../data-types/index.md). For some data types calculated value of hash function may be the same for the same values even if types of arguments differ (integers of different size, named and unnamed `Tuple` with the same data, `Map` and the corresponding `Array(Tuple(key, value))` type with the same data). **Returned Value** -A [UInt64](/docs/en/sql-reference/data-types/int-uint.md) data type hash value. +A [UInt64](../data-types/int-uint.md) data type hash value. **Example** @@ -608,12 +602,12 @@ Alias: `yandexConsistentHash` (left for backwards compatibility sake). **Parameters** -- `input`: A UInt64-type key [UInt64](/docs/en/sql-reference/data-types/int-uint.md). -- `n`: Number of buckets. [UInt16](/docs/en/sql-reference/data-types/int-uint.md). +- `input`: A UInt64-type key [UInt64](../data-types/int-uint.md). +- `n`: Number of buckets. [UInt16](../data-types/int-uint.md). **Returned value** -- A [UInt16](/docs/en/sql-reference/data-types/int-uint.md) data type hash value. +- A [UInt16](../data-types/int-uint.md) data type hash value. **Implementation details** @@ -644,12 +638,12 @@ murmurHash2_64(par1, ...) **Arguments** -Both functions take a variable number of input parameters. Arguments can be any of the [supported data types](/docs/en/sql-reference/data-types/index.md). For some data types calculated value of hash function may be the same for the same values even if types of arguments differ (integers of different size, named and unnamed `Tuple` with the same data, `Map` and the corresponding `Array(Tuple(key, value))` type with the same data). +Both functions take a variable number of input parameters. Arguments can be any of the [supported data types](../data-types/index.md). For some data types calculated value of hash function may be the same for the same values even if types of arguments differ (integers of different size, named and unnamed `Tuple` with the same data, `Map` and the corresponding `Array(Tuple(key, value))` type with the same data). **Returned Value** -- The `murmurHash2_32` function returns hash value having the [UInt32](/docs/en/sql-reference/data-types/int-uint.md) data type. -- The `murmurHash2_64` function returns hash value having the [UInt64](/docs/en/sql-reference/data-types/int-uint.md) data type. +- The `murmurHash2_32` function returns hash value having the [UInt32](../data-types/int-uint.md) data type. +- The `murmurHash2_64` function returns hash value having the [UInt64](../data-types/int-uint.md) data type. **Example** @@ -675,13 +669,11 @@ gccMurmurHash(par1, ...) **Arguments** -- `par1, ...` — A variable number of parameters that can be any of the [supported data types](/docs/en/sql-reference/data-types/index.md/#data_types). +- `par1, ...` — A variable number of parameters that can be any of the [supported data types](../data-types/index.md/#data_types). **Returned value** -- Calculated hash value. - -Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). +- Calculated hash value. [UInt64](../data-types/int-uint.md). **Example** @@ -714,13 +706,11 @@ MurmurHash(par1, ...) **Arguments** -- `par1, ...` — A variable number of parameters that can be any of the [supported data types](/docs/en/sql-reference/data-types/index.md/#data_types). +- `par1, ...` — A variable number of parameters that can be any of the [supported data types](../data-types/index.md/#data_types). **Returned value** -- Calculated hash value. - -Type: [UInt32](/docs/en/sql-reference/data-types/int-uint.md). +- Calculated hash value. [UInt32](../data-types/int-uint.md). **Example** @@ -751,12 +741,12 @@ murmurHash3_64(par1, ...) **Arguments** -Both functions take a variable number of input parameters. Arguments can be any of the [supported data types](/docs/en/sql-reference/data-types/index.md). For some data types calculated value of hash function may be the same for the same values even if types of arguments differ (integers of different size, named and unnamed `Tuple` with the same data, `Map` and the corresponding `Array(Tuple(key, value))` type with the same data). +Both functions take a variable number of input parameters. Arguments can be any of the [supported data types](../data-types/index.md). For some data types calculated value of hash function may be the same for the same values even if types of arguments differ (integers of different size, named and unnamed `Tuple` with the same data, `Map` and the corresponding `Array(Tuple(key, value))` type with the same data). **Returned Value** -- The `murmurHash3_32` function returns a [UInt32](/docs/en/sql-reference/data-types/int-uint.md) data type hash value. -- The `murmurHash3_64` function returns a [UInt64](/docs/en/sql-reference/data-types/int-uint.md) data type hash value. +- The `murmurHash3_32` function returns a [UInt32](../data-types/int-uint.md) data type hash value. +- The `murmurHash3_64` function returns a [UInt64](../data-types/int-uint.md) data type hash value. **Example** @@ -782,13 +772,11 @@ murmurHash3_128(expr) **Arguments** -- `expr` — A list of [expressions](/docs/en/sql-reference/syntax.md/#syntax-expressions). [String](/docs/en/sql-reference/data-types/string.md). +- `expr` — A list of [expressions](../syntax.md/#syntax-expressions). [String](../data-types/string.md). **Returned value** -A 128-bit `MurmurHash3` hash value. - -Type: [FixedString(16)](/docs/en/sql-reference/data-types/fixedstring.md). +A 128-bit `MurmurHash3` hash value. [FixedString(16)](../data-types/fixedstring.md). **Example** @@ -818,13 +806,11 @@ xxh3(expr) **Arguments** -- `expr` — A list of [expressions](/docs/en/sql-reference/syntax.md/#syntax-expressions) of any data type. +- `expr` — A list of [expressions](../syntax.md/#syntax-expressions) of any data type. **Returned value** -A 64-bit `xxh3` hash value. - -Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). +A 64-bit `xxh3` hash value. [UInt64](../data-types/int-uint.md). **Example** @@ -856,9 +842,11 @@ SELECT xxHash64('') **Returned value** -A `UInt32` or `UInt64` data type hash value. +- Hash value. [UInt32/64](../data-types/int-uint.md). -Type: `UInt32` for `xxHash32` and `UInt64` for `xxHash64`. +:::note +The return type will be `UInt32` for `xxHash32` and `UInt64` for `xxHash64`. +::: **Example** @@ -884,7 +872,7 @@ Result: Splits a ASCII string into n-grams of `ngramsize` symbols and returns the n-gram `simhash`. Is case sensitive. -Can be used for detection of semi-duplicate strings with [bitHammingDistance](/docs/en/sql-reference/functions/bit-functions.md/#bithammingdistance). The smaller is the [Hamming Distance](https://en.wikipedia.org/wiki/Hamming_distance) of the calculated `simhashes` of two strings, the more likely these strings are the same. +Can be used for detection of semi-duplicate strings with [bitHammingDistance](../functions/bit-functions.md/#bithammingdistance). The smaller is the [Hamming Distance](https://en.wikipedia.org/wiki/Hamming_distance) of the calculated `simhashes` of two strings, the more likely these strings are the same. **Syntax** @@ -894,14 +882,12 @@ ngramSimHash(string[, ngramsize]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](../data-types/string.md). +- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md). **Returned value** -- Hash value. - -Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). +- Hash value. [UInt64](../data-types/int-uint.md). **Example** @@ -923,7 +909,7 @@ Result: Splits a ASCII string into n-grams of `ngramsize` symbols and returns the n-gram `simhash`. Is case insensitive. -Can be used for detection of semi-duplicate strings with [bitHammingDistance](/docs/en/sql-reference/functions/bit-functions.md/#bithammingdistance). The smaller is the [Hamming Distance](https://en.wikipedia.org/wiki/Hamming_distance) of the calculated `simhashes` of two strings, the more likely these strings are the same. +Can be used for detection of semi-duplicate strings with [bitHammingDistance](../functions/bit-functions.md/#bithammingdistance). The smaller is the [Hamming Distance](https://en.wikipedia.org/wiki/Hamming_distance) of the calculated `simhashes` of two strings, the more likely these strings are the same. **Syntax** @@ -933,14 +919,12 @@ ngramSimHashCaseInsensitive(string[, ngramsize]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](../data-types/string.md). +- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md). **Returned value** -- Hash value. - -Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). +- Hash value. [UInt64](../data-types/int-uint.md). **Example** @@ -962,7 +946,7 @@ Result: Splits a UTF-8 string into n-grams of `ngramsize` symbols and returns the n-gram `simhash`. Is case sensitive. -Can be used for detection of semi-duplicate strings with [bitHammingDistance](/docs/en/sql-reference/functions/bit-functions.md/#bithammingdistance). The smaller is the [Hamming Distance](https://en.wikipedia.org/wiki/Hamming_distance) of the calculated `simhashes` of two strings, the more likely these strings are the same. +Can be used for detection of semi-duplicate strings with [bitHammingDistance](../functions/bit-functions.md/#bithammingdistance). The smaller is the [Hamming Distance](https://en.wikipedia.org/wiki/Hamming_distance) of the calculated `simhashes` of two strings, the more likely these strings are the same. **Syntax** @@ -972,14 +956,12 @@ ngramSimHashUTF8(string[, ngramsize]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](../data-types/string.md). +- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md). **Returned value** -- Hash value. - -Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). +- Hash value. [UInt64](../data-types/int-uint.md). **Example** @@ -1001,7 +983,7 @@ Result: Splits a UTF-8 string into n-grams of `ngramsize` symbols and returns the n-gram `simhash`. Is case insensitive. -Can be used for detection of semi-duplicate strings with [bitHammingDistance](/docs/en/sql-reference/functions/bit-functions.md/#bithammingdistance). The smaller is the [Hamming Distance](https://en.wikipedia.org/wiki/Hamming_distance) of the calculated `simhashes` of two strings, the more likely these strings are the same. +Can be used for detection of semi-duplicate strings with [bitHammingDistance](../functions/bit-functions.md/#bithammingdistance). The smaller is the [Hamming Distance](https://en.wikipedia.org/wiki/Hamming_distance) of the calculated `simhashes` of two strings, the more likely these strings are the same. **Syntax** @@ -1011,14 +993,12 @@ ngramSimHashCaseInsensitiveUTF8(string[, ngramsize]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](../data-types/string.md). +- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md). **Returned value** -- Hash value. - -Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). +- Hash value. [UInt64](../data-types/int-uint.md). **Example** @@ -1040,7 +1020,7 @@ Result: Splits a ASCII string into parts (shingles) of `shinglesize` words and returns the word shingle `simhash`. Is case sensitive. -Can be used for detection of semi-duplicate strings with [bitHammingDistance](/docs/en/sql-reference/functions/bit-functions.md/#bithammingdistance). The smaller is the [Hamming Distance](https://en.wikipedia.org/wiki/Hamming_distance) of the calculated `simhashes` of two strings, the more likely these strings are the same. +Can be used for detection of semi-duplicate strings with [bitHammingDistance](../functions/bit-functions.md/#bithammingdistance). The smaller is the [Hamming Distance](https://en.wikipedia.org/wiki/Hamming_distance) of the calculated `simhashes` of two strings, the more likely these strings are the same. **Syntax** @@ -1050,14 +1030,12 @@ wordShingleSimHash(string[, shinglesize]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](../data-types/string.md). +- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md). **Returned value** -- Hash value. - -Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). +- Hash value. [UInt64](../data-types/int-uint.md). **Example** @@ -1079,7 +1057,7 @@ Result: Splits a ASCII string into parts (shingles) of `shinglesize` words and returns the word shingle `simhash`. Is case insensitive. -Can be used for detection of semi-duplicate strings with [bitHammingDistance](/docs/en/sql-reference/functions/bit-functions.md/#bithammingdistance). The smaller is the [Hamming Distance](https://en.wikipedia.org/wiki/Hamming_distance) of the calculated `simhashes` of two strings, the more likely these strings are the same. +Can be used for detection of semi-duplicate strings with [bitHammingDistance](../functions/bit-functions.md/#bithammingdistance). The smaller is the [Hamming Distance](https://en.wikipedia.org/wiki/Hamming_distance) of the calculated `simhashes` of two strings, the more likely these strings are the same. **Syntax** @@ -1089,14 +1067,12 @@ wordShingleSimHashCaseInsensitive(string[, shinglesize]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](../data-types/string.md). +- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md). **Returned value** -- Hash value. - -Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). +- Hash value. [UInt64](../data-types/int-uint.md). **Example** @@ -1118,7 +1094,7 @@ Result: Splits a UTF-8 string into parts (shingles) of `shinglesize` words and returns the word shingle `simhash`. Is case sensitive. -Can be used for detection of semi-duplicate strings with [bitHammingDistance](/docs/en/sql-reference/functions/bit-functions.md/#bithammingdistance). The smaller is the [Hamming Distance](https://en.wikipedia.org/wiki/Hamming_distance) of the calculated `simhashes` of two strings, the more likely these strings are the same. +Can be used for detection of semi-duplicate strings with [bitHammingDistance](../functions/bit-functions.md/#bithammingdistance). The smaller is the [Hamming Distance](https://en.wikipedia.org/wiki/Hamming_distance) of the calculated `simhashes` of two strings, the more likely these strings are the same. **Syntax** @@ -1128,14 +1104,12 @@ wordShingleSimHashUTF8(string[, shinglesize]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](../data-types/string.md). +- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md). **Returned value** -- Hash value. - -Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). +- Hash value. [UInt64](../data-types/int-uint.md). **Example** @@ -1157,7 +1131,7 @@ Result: Splits a UTF-8 string into parts (shingles) of `shinglesize` words and returns the word shingle `simhash`. Is case insensitive. -Can be used for detection of semi-duplicate strings with [bitHammingDistance](/docs/en/sql-reference/functions/bit-functions.md/#bithammingdistance). The smaller is the [Hamming Distance](https://en.wikipedia.org/wiki/Hamming_distance) of the calculated `simhashes` of two strings, the more likely these strings are the same. +Can be used for detection of semi-duplicate strings with [bitHammingDistance](../functions/bit-functions.md/#bithammingdistance). The smaller is the [Hamming Distance](https://en.wikipedia.org/wiki/Hamming_distance) of the calculated `simhashes` of two strings, the more likely these strings are the same. **Syntax** @@ -1167,14 +1141,12 @@ wordShingleSimHashCaseInsensitiveUTF8(string[, shinglesize]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](../data-types/string.md). +- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md). **Returned value** -- Hash value. - -Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). +- Hash value. [UInt64](../data-types/int-uint.md). **Example** @@ -1204,13 +1176,11 @@ wyHash64(string) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). +- `string` — String. [String](../data-types/string.md). **Returned value** -- Hash value. - -Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). +- Hash value. [UInt64](../data-types/int-uint.md). **Example** @@ -1232,7 +1202,7 @@ Result: Splits a ASCII string into n-grams of `ngramsize` symbols and calculates hash values for each n-gram. Uses `hashnum` minimum hashes to calculate the minimum hash and `hashnum` maximum hashes to calculate the maximum hash. Returns a tuple with these hashes. Is case sensitive. -Can be used for detection of semi-duplicate strings with [tupleHammingDistance](/docs/en/sql-reference/functions/tuple-functions.md/#tuplehammingdistance). For two strings: if one of the returned hashes is the same for both strings, we think that those strings are the same. +Can be used for detection of semi-duplicate strings with [tupleHammingDistance](../functions/tuple-functions.md/#tuplehammingdistance). For two strings: if one of the returned hashes is the same for both strings, we think that those strings are the same. **Syntax** @@ -1242,15 +1212,13 @@ ngramMinHash(string[, ngramsize, hashnum]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](../data-types/string.md). +- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md). +- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](../data-types/int-uint.md). **Returned value** -- Tuple with two hashes — the minimum and the maximum. - -Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([UInt64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md)). +- Tuple with two hashes — the minimum and the maximum. [Tuple](../data-types/tuple.md)([UInt64](../data-types/int-uint.md), [UInt64](../data-types/int-uint.md)). **Example** @@ -1272,7 +1240,7 @@ Result: Splits a ASCII string into n-grams of `ngramsize` symbols and calculates hash values for each n-gram. Uses `hashnum` minimum hashes to calculate the minimum hash and `hashnum` maximum hashes to calculate the maximum hash. Returns a tuple with these hashes. Is case insensitive. -Can be used for detection of semi-duplicate strings with [tupleHammingDistance](/docs/en/sql-reference/functions/tuple-functions.md/#tuplehammingdistance). For two strings: if one of the returned hashes is the same for both strings, we think that those strings are the same. +Can be used for detection of semi-duplicate strings with [tupleHammingDistance](../functions/tuple-functions.md/#tuplehammingdistance). For two strings: if one of the returned hashes is the same for both strings, we think that those strings are the same. **Syntax** @@ -1282,15 +1250,13 @@ ngramMinHashCaseInsensitive(string[, ngramsize, hashnum]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](../data-types/string.md). +- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md). +- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](../data-types/int-uint.md). **Returned value** -- Tuple with two hashes — the minimum and the maximum. - -Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([UInt64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md)). +- Tuple with two hashes — the minimum and the maximum. [Tuple](../data-types/tuple.md)([UInt64](../data-types/int-uint.md), [UInt64](../data-types/int-uint.md)). **Example** @@ -1312,7 +1278,7 @@ Result: Splits a UTF-8 string into n-grams of `ngramsize` symbols and calculates hash values for each n-gram. Uses `hashnum` minimum hashes to calculate the minimum hash and `hashnum` maximum hashes to calculate the maximum hash. Returns a tuple with these hashes. Is case sensitive. -Can be used for detection of semi-duplicate strings with [tupleHammingDistance](/docs/en/sql-reference/functions/tuple-functions.md/#tuplehammingdistance). For two strings: if one of the returned hashes is the same for both strings, we think that those strings are the same. +Can be used for detection of semi-duplicate strings with [tupleHammingDistance](../functions/tuple-functions.md/#tuplehammingdistance). For two strings: if one of the returned hashes is the same for both strings, we think that those strings are the same. **Syntax** @@ -1322,15 +1288,13 @@ ngramMinHashUTF8(string[, ngramsize, hashnum]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](../data-types/string.md). +- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md). +- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](../data-types/int-uint.md). **Returned value** -- Tuple with two hashes — the minimum and the maximum. - -Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([UInt64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md)). +- Tuple with two hashes — the minimum and the maximum. [Tuple](../data-types/tuple.md)([UInt64](../data-types/int-uint.md), [UInt64](../data-types/int-uint.md)). **Example** @@ -1352,7 +1316,7 @@ Result: Splits a UTF-8 string into n-grams of `ngramsize` symbols and calculates hash values for each n-gram. Uses `hashnum` minimum hashes to calculate the minimum hash and `hashnum` maximum hashes to calculate the maximum hash. Returns a tuple with these hashes. Is case insensitive. -Can be used for detection of semi-duplicate strings with [tupleHammingDistance](/docs/en/sql-reference/functions/tuple-functions.md/#tuplehammingdistance). For two strings: if one of the returned hashes is the same for both strings, we think that those strings are the same. +Can be used for detection of semi-duplicate strings with [tupleHammingDistance](../functions/tuple-functions.md/#tuplehammingdistance). For two strings: if one of the returned hashes is the same for both strings, we think that those strings are the same. **Syntax** @@ -1362,15 +1326,13 @@ ngramMinHashCaseInsensitiveUTF8(string [, ngramsize, hashnum]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](../data-types/string.md). +- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md). +- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](../data-types/int-uint.md). **Returned value** -- Tuple with two hashes — the minimum and the maximum. - -Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([UInt64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md)). +- Tuple with two hashes — the minimum and the maximum. [Tuple](../data-types/tuple.md)([UInt64](../data-types/int-uint.md), [UInt64](../data-types/int-uint.md)). **Example** @@ -1400,15 +1362,13 @@ ngramMinHashArg(string[, ngramsize, hashnum]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](../data-types/string.md). +- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md). +- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](../data-types/int-uint.md). **Returned value** -- Tuple with two tuples with `hashnum` n-grams each. - -Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md)), [Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md))). +- Tuple with two tuples with `hashnum` n-grams each. [Tuple](../data-types/tuple.md)([Tuple](../data-types/tuple.md)([String](../data-types/string.md)), [Tuple](../data-types/tuple.md)([String](../data-types/string.md))). **Example** @@ -1438,15 +1398,13 @@ ngramMinHashArgCaseInsensitive(string[, ngramsize, hashnum]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](../data-types/string.md). +- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md). +- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](../data-types/int-uint.md). **Returned value** -- Tuple with two tuples with `hashnum` n-grams each. - -Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md)), [Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md))). +- Tuple with two tuples with `hashnum` n-grams each. [Tuple](../data-types/tuple.md)([Tuple](../data-types/tuple.md)([String](../data-types/string.md)), [Tuple](../data-types/tuple.md)([String](../data-types/string.md))). **Example** @@ -1476,15 +1434,13 @@ ngramMinHashArgUTF8(string[, ngramsize, hashnum]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](../data-types/string.md). +- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md). +- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](../data-types/int-uint.md). **Returned value** -- Tuple with two tuples with `hashnum` n-grams each. - -Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md)), [Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md))). +- Tuple with two tuples with `hashnum` n-grams each. [Tuple](../data-types/tuple.md)([Tuple](../data-types/tuple.md)([String](../data-types/string.md)), [Tuple](../data-types/tuple.md)([String](../data-types/string.md))). **Example** @@ -1514,15 +1470,13 @@ ngramMinHashArgCaseInsensitiveUTF8(string[, ngramsize, hashnum]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](../data-types/string.md). +- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md). +- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](../data-types/int-uint.md). **Returned value** -- Tuple with two tuples with `hashnum` n-grams each. - -Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md)), [Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md))). +- Tuple with two tuples with `hashnum` n-grams each. [Tuple](../data-types/tuple.md)([Tuple](../data-types/tuple.md)([String](../data-types/string.md)), [Tuple](../data-types/tuple.md)([String](../data-types/string.md))). **Example** @@ -1544,7 +1498,7 @@ Result: Splits a ASCII string into parts (shingles) of `shinglesize` words and calculates hash values for each word shingle. Uses `hashnum` minimum hashes to calculate the minimum hash and `hashnum` maximum hashes to calculate the maximum hash. Returns a tuple with these hashes. Is case sensitive. -Can be used for detection of semi-duplicate strings with [tupleHammingDistance](/docs/en/sql-reference/functions/tuple-functions.md/#tuplehammingdistance). For two strings: if one of the returned hashes is the same for both strings, we think that those strings are the same. +Can be used for detection of semi-duplicate strings with [tupleHammingDistance](../functions/tuple-functions.md/#tuplehammingdistance). For two strings: if one of the returned hashes is the same for both strings, we think that those strings are the same. **Syntax** @@ -1554,15 +1508,13 @@ wordShingleMinHash(string[, shinglesize, hashnum]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](../data-types/string.md). +- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md). +- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](../data-types/int-uint.md). **Returned value** -- Tuple with two hashes — the minimum and the maximum. - -Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([UInt64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md)). +- Tuple with two hashes — the minimum and the maximum. [Tuple](../data-types/tuple.md)([UInt64](../data-types/int-uint.md), [UInt64](../data-types/int-uint.md)). **Example** @@ -1584,7 +1536,7 @@ Result: Splits a ASCII string into parts (shingles) of `shinglesize` words and calculates hash values for each word shingle. Uses `hashnum` minimum hashes to calculate the minimum hash and `hashnum` maximum hashes to calculate the maximum hash. Returns a tuple with these hashes. Is case insensitive. -Can be used for detection of semi-duplicate strings with [tupleHammingDistance](/docs/en/sql-reference/functions/tuple-functions.md/#tuplehammingdistance). For two strings: if one of the returned hashes is the same for both strings, we think that those strings are the same. +Can be used for detection of semi-duplicate strings with [tupleHammingDistance](../functions/tuple-functions.md/#tuplehammingdistance). For two strings: if one of the returned hashes is the same for both strings, we think that those strings are the same. **Syntax** @@ -1594,15 +1546,13 @@ wordShingleMinHashCaseInsensitive(string[, shinglesize, hashnum]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](../data-types/string.md). +- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md). +- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](../data-types/int-uint.md). **Returned value** -- Tuple with two hashes — the minimum and the maximum. - -Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([UInt64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md)). +- Tuple with two hashes — the minimum and the maximum. [Tuple](../data-types/tuple.md)([UInt64](../data-types/int-uint.md), [UInt64](../data-types/int-uint.md)). **Example** @@ -1624,7 +1574,7 @@ Result: Splits a UTF-8 string into parts (shingles) of `shinglesize` words and calculates hash values for each word shingle. Uses `hashnum` minimum hashes to calculate the minimum hash and `hashnum` maximum hashes to calculate the maximum hash. Returns a tuple with these hashes. Is case sensitive. -Can be used for detection of semi-duplicate strings with [tupleHammingDistance](/docs/en/sql-reference/functions/tuple-functions.md/#tuplehammingdistance). For two strings: if one of the returned hashes is the same for both strings, we think that those strings are the same. +Can be used for detection of semi-duplicate strings with [tupleHammingDistance](../functions/tuple-functions.md/#tuplehammingdistance). For two strings: if one of the returned hashes is the same for both strings, we think that those strings are the same. **Syntax** @@ -1634,15 +1584,13 @@ wordShingleMinHashUTF8(string[, shinglesize, hashnum]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](../data-types/string.md). +- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md). +- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](../data-types/int-uint.md). **Returned value** -- Tuple with two hashes — the minimum and the maximum. - -Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([UInt64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md)). +- Tuple with two hashes — the minimum and the maximum. [Tuple](../data-types/tuple.md)([UInt64](../data-types/int-uint.md), [UInt64](../data-types/int-uint.md)). **Example** @@ -1664,7 +1612,7 @@ Result: Splits a UTF-8 string into parts (shingles) of `shinglesize` words and calculates hash values for each word shingle. Uses `hashnum` minimum hashes to calculate the minimum hash and `hashnum` maximum hashes to calculate the maximum hash. Returns a tuple with these hashes. Is case insensitive. -Can be used for detection of semi-duplicate strings with [tupleHammingDistance](/docs/en/sql-reference/functions/tuple-functions.md/#tuplehammingdistance). For two strings: if one of the returned hashes is the same for both strings, we think that those strings are the same. +Can be used for detection of semi-duplicate strings with [tupleHammingDistance](../functions/tuple-functions.md/#tuplehammingdistance). For two strings: if one of the returned hashes is the same for both strings, we think that those strings are the same. **Syntax** @@ -1674,15 +1622,13 @@ wordShingleMinHashCaseInsensitiveUTF8(string[, shinglesize, hashnum]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](../data-types/string.md). +- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md). +- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](../data-types/int-uint.md). **Returned value** -- Tuple with two hashes — the minimum and the maximum. - -Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([UInt64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md)). +- Tuple with two hashes — the minimum and the maximum. [Tuple](../data-types/tuple.md)([UInt64](../data-types/int-uint.md), [UInt64](../data-types/int-uint.md)). **Example** @@ -1712,15 +1658,13 @@ wordShingleMinHashArg(string[, shinglesize, hashnum]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](../data-types/string.md). +- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md). +- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](../data-types/int-uint.md). **Returned value** -- Tuple with two tuples with `hashnum` word shingles each. - -Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md)), [Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md))). +- Tuple with two tuples with `hashnum` word shingles each. [Tuple](../data-types/tuple.md)([Tuple](../data-types/tuple.md)([String](../data-types/string.md)), [Tuple](../data-types/tuple.md)([String](../data-types/string.md))). **Example** @@ -1750,15 +1694,13 @@ wordShingleMinHashArgCaseInsensitive(string[, shinglesize, hashnum]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](../data-types/string.md). +- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md). +- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](../data-types/int-uint.md). **Returned value** -- Tuple with two tuples with `hashnum` word shingles each. - -Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md)), [Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md))). +- Tuple with two tuples with `hashnum` word shingles each. [Tuple](../data-types/tuple.md)([Tuple](../data-types/tuple.md)([String](../data-types/string.md)), [Tuple](../data-types/tuple.md)([String](../data-types/string.md))). **Example** @@ -1788,15 +1730,13 @@ wordShingleMinHashArgUTF8(string[, shinglesize, hashnum]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](../data-types/string.md). +- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md). +- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](../data-types/int-uint.md). **Returned value** -- Tuple with two tuples with `hashnum` word shingles each. - -Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md)), [Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md))). +- Tuple with two tuples with `hashnum` word shingles each. [Tuple](../data-types/tuple.md)([Tuple](../data-types/tuple.md)([String](../data-types/string.md)), [Tuple](../data-types/tuple.md)([String](../data-types/string.md))). **Example** @@ -1826,15 +1766,13 @@ wordShingleMinHashArgCaseInsensitiveUTF8(string[, shinglesize, hashnum]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](../data-types/string.md). +- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md). +- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](../data-types/int-uint.md). **Returned value** -- Tuple with two tuples with `hashnum` word shingles each. - -Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md)), [Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md))). +- Tuple with two tuples with `hashnum` word shingles each. [Tuple](../data-types/tuple.md)([Tuple](../data-types/tuple.md)([String](../data-types/string.md)), [Tuple](../data-types/tuple.md)([String](../data-types/string.md))). **Example** @@ -1872,7 +1810,7 @@ Alias: `sqid` **Returned Value** -A sqid [String](/docs/en/sql-reference/data-types/string.md). +A sqid [String](../data-types/string.md). **Example** @@ -1899,11 +1837,11 @@ sqidDecode(sqid) **Arguments** -- A sqid - [String](/docs/en/sql-reference/data-types/string.md) +- A sqid - [String](../data-types/string.md) **Returned Value** -The sqid transformed to numbers [Array(UInt64)](/docs/en/sql-reference/data-types/array.md). +The sqid transformed to numbers [Array(UInt64)](../data-types/array.md). **Example** diff --git a/docs/en/sql-reference/functions/index.md b/docs/en/sql-reference/functions/index.md index d07a5292431..c0256ba4735 100644 --- a/docs/en/sql-reference/functions/index.md +++ b/docs/en/sql-reference/functions/index.md @@ -11,7 +11,7 @@ There are at least\* two types of functions - regular functions (they are just c In this section we discuss regular functions. For aggregate functions, see the section “Aggregate functions”. :::note -There is a third type of function that the [‘arrayJoin’ function](/docs/en/sql-reference/functions/array-join.md) belongs to. And [table functions](/docs/en/sql-reference/table-functions/index.md) can also be mentioned separately. +There is a third type of function that the [‘arrayJoin’ function](../functions/array-join.md) belongs to. And [table functions](../table-functions/index.md) can also be mentioned separately. ::: ## Strong Typing @@ -63,4 +63,4 @@ For some functions the first argument (the lambda function) can be omitted. In t ## User Defined Functions (UDFs) -ClickHouse supports user-defined functions. See [UDFs](/docs/en/sql-reference/functions/udf.md). +ClickHouse supports user-defined functions. See [UDFs](../functions/udf.md). diff --git a/docs/en/sql-reference/functions/introspection.md b/docs/en/sql-reference/functions/introspection.md index 1025b8bdc3d..bec97208843 100644 --- a/docs/en/sql-reference/functions/introspection.md +++ b/docs/en/sql-reference/functions/introspection.md @@ -36,16 +36,13 @@ addressToLine(address_of_binary_instruction) **Arguments** -- `address_of_binary_instruction` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Address of instruction in a running process. +- `address_of_binary_instruction` ([UInt64](../data-types/int-uint.md)) — Address of instruction in a running process. **Returned value** - Source code filename and the line number in this file delimited by colon. - For example, `/build/obj-x86_64-linux-gnu/../src/Common/ThreadPool.cpp:199`, where `199` is a line number. - - Name of a binary, if the function couldn’t find the debug information. - - Empty string, if the address is not valid. Type: [String](../../sql-reference/data-types/string.md). @@ -117,9 +114,11 @@ trace_source_code_lines: /lib/x86_64-linux-gnu/libpthread-2.27.so ## addressToLineWithInlines -Similar to `addressToLine`, but it will return an Array with all inline functions, and will be much slower as a price. +Similar to `addressToLine`, but returns an Array with all inline functions. As a result of this, it is slower than `addressToLine`. +:::note If you use official ClickHouse packages, you need to install the `clickhouse-common-static-dbg` package. +::: **Syntax** @@ -129,17 +128,11 @@ addressToLineWithInlines(address_of_binary_instruction) **Arguments** -- `address_of_binary_instruction` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Address of instruction in a running process. +- `address_of_binary_instruction` ([UInt64](../data-types/int-uint.md)) — Address of instruction in a running process. **Returned value** -- Array which first element is source code filename and the line number in this file delimited by colon. And from second element, inline functions' source code filename and line number and function name are listed. - -- Array with single element which is name of a binary, if the function couldn’t find the debug information. - -- Empty array, if the address is not valid. - -Type: [Array(String)](../../sql-reference/data-types/array.md). +- An array whose first element is the source code filename and line number in the file delimited by a colon. From the second element onwards, inline functions' source code filenames, line numbers and function names are listed. If the function couldn’t find the debug information, then an array with a single element equal to the name of the binary is returned, otherwise an empty array is returned if the address is not valid. [Array(String)](../data-types/array.md). **Example** @@ -232,14 +225,12 @@ addressToSymbol(address_of_binary_instruction) **Arguments** -- `address_of_binary_instruction` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Address of instruction in a running process. +- `address_of_binary_instruction` ([UInt64](../data-types/int-uint.md)) — Address of instruction in a running process. **Returned value** -- Symbol from ClickHouse object files. -- Empty string, if the address is not valid. - -Type: [String](../../sql-reference/data-types/string.md). +- Symbol from ClickHouse object files. [String](../data-types/string.md). +- Empty string, if the address is not valid. [String](../data-types/string.md). **Example** @@ -329,14 +320,11 @@ demangle(symbol) **Arguments** -- `symbol` ([String](../../sql-reference/data-types/string.md)) — Symbol from an object file. +- `symbol` ([String](../data-types/string.md)) — Symbol from an object file. **Returned value** -- Name of the C++ function. -- Empty string if a symbol is not valid. - -Type: [String](../../sql-reference/data-types/string.md). +- Name of the C++ function, or an empty string if the symbol is not valid. [String](../data-types/string.md). **Example** @@ -425,7 +413,7 @@ tid() **Returned value** -- Current thread id. [Uint64](../../sql-reference/data-types/int-uint.md#uint-ranges). +- Current thread id. [Uint64](../data-types/int-uint.md#uint-ranges). **Example** @@ -455,7 +443,7 @@ logTrace('message') **Arguments** -- `message` — Message that is emitted to server log. [String](../../sql-reference/data-types/string.md#string). +- `message` — Message that is emitted to server log. [String](../data-types/string.md#string). **Returned value** diff --git a/docs/en/sql-reference/functions/ip-address-functions.md b/docs/en/sql-reference/functions/ip-address-functions.md index be20e02d77e..5b6a3aef2c8 100644 --- a/docs/en/sql-reference/functions/ip-address-functions.md +++ b/docs/en/sql-reference/functions/ip-address-functions.md @@ -147,13 +147,11 @@ IPv6StringToNum(string) **Argument** -- `string` — IP address. [String](../../sql-reference/data-types/string.md). +- `string` — IP address. [String](../data-types/string.md). **Returned value** -- IPv6 address in binary format. - -Type: [FixedString(16)](../../sql-reference/data-types/fixedstring.md). +- IPv6 address in binary format. [FixedString(16)](../data-types/fixedstring.md). **Example** @@ -248,7 +246,7 @@ SELECT IPv6CIDRToRange(toIPv6('2001:0db8:0000:85a3:0000:0000:ac1f:8001'), 32); ## toIPv4(string) -An alias to `IPv4StringToNum()` that takes a string form of IPv4 address and returns value of [IPv4](../../sql-reference/data-types/ipv4.md) type, which is binary equal to value returned by `IPv4StringToNum()`. +An alias to `IPv4StringToNum()` that takes a string form of IPv4 address and returns value of [IPv4](../data-types/ipv4.md) type, which is binary equal to value returned by `IPv4StringToNum()`. ``` sql WITH @@ -296,7 +294,7 @@ Same as `toIPv6`, but if the IPv6 address has an invalid format, it returns null ## toIPv6 -Converts a string form of IPv6 address to [IPv6](../../sql-reference/data-types/ipv6.md) type. If the IPv6 address has an invalid format, returns an empty value. +Converts a string form of IPv6 address to [IPv6](../data-types/ipv6.md) type. If the IPv6 address has an invalid format, returns an empty value. Similar to [IPv6StringToNum](#ipv6stringtonums) function, which converts IPv6 address to binary format. If the input string contains a valid IPv4 address, then the IPv6 equivalent of the IPv4 address is returned. @@ -309,13 +307,11 @@ toIPv6(string) **Argument** -- `string` — IP address. [String](../../sql-reference/data-types/string.md) +- `string` — IP address. [String](../data-types/string.md) **Returned value** -- IP address. - -Type: [IPv6](../../sql-reference/data-types/ipv6.md). +- IP address. [IPv6](../data-types/ipv6.md). **Examples** @@ -370,13 +366,11 @@ isIPv4String(string) **Arguments** -- `string` — IP address. [String](../../sql-reference/data-types/string.md). +- `string` — IP address. [String](../data-types/string.md). **Returned value** -- `1` if `string` is IPv4 address, `0` otherwise. - -Type: [UInt8](../../sql-reference/data-types/int-uint.md). +- `1` if `string` is IPv4 address, `0` otherwise. [UInt8](../data-types/int-uint.md). **Examples** @@ -408,13 +402,11 @@ isIPv6String(string) **Arguments** -- `string` — IP address. [String](../../sql-reference/data-types/string.md). +- `string` — IP address. [String](../data-types/string.md). **Returned value** -- `1` if `string` is IPv6 address, `0` otherwise. - -Type: [UInt8](../../sql-reference/data-types/int-uint.md). +- `1` if `string` is IPv6 address, `0` otherwise. [UInt8](../data-types/int-uint.md). **Examples** @@ -449,14 +441,12 @@ This function accepts both IPv4 and IPv6 addresses (and networks) represented as **Arguments** -- `address` — An IPv4 or IPv6 address. [String](../../sql-reference/data-types/string.md). -- `prefix` — An IPv4 or IPv6 network prefix in CIDR. [String](../../sql-reference/data-types/string.md). +- `address` — An IPv4 or IPv6 address. [String](../data-types/string.md). +- `prefix` — An IPv4 or IPv6 network prefix in CIDR. [String](../data-types/string.md). **Returned value** -- `1` or `0`. - -Type: [UInt8](../../sql-reference/data-types/int-uint.md). +- `1` or `0`. [UInt8](../data-types/int-uint.md). **Example** diff --git a/docs/en/sql-reference/functions/json-functions.md b/docs/en/sql-reference/functions/json-functions.md index e920ab82988..8359d5f9fbc 100644 --- a/docs/en/sql-reference/functions/json-functions.md +++ b/docs/en/sql-reference/functions/json-functions.md @@ -31,7 +31,7 @@ simpleJSONHas(json, field_name) **Parameters** -- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string) +- `json`: The JSON in which the field is searched for. [String](../data-types/string.md#string) - `field_name`: The name of the field to search for. [String literal](../syntax#string) **Returned value** @@ -71,7 +71,7 @@ simpleJSONExtractUInt(json, field_name) **Parameters** -- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string) +- `json`: The JSON in which the field is searched for. [String](../data-types/string.md#string) - `field_name`: The name of the field to search for. [String literal](../syntax#string) **Returned value** @@ -118,7 +118,7 @@ simpleJSONExtractInt(json, field_name) **Parameters** -- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string) +- `json`: The JSON in which the field is searched for. [String](../data-types/string.md#string) - `field_name`: The name of the field to search for. [String literal](../syntax#string) **Returned value** @@ -165,7 +165,7 @@ simpleJSONExtractFloat(json, field_name) **Parameters** -- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string) +- `json`: The JSON in which the field is searched for. [String](../data-types/string.md#string) - `field_name`: The name of the field to search for. [String literal](../syntax#string) **Returned value** @@ -212,7 +212,7 @@ simpleJSONExtractBool(json, field_name) **Parameters** -- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string) +- `json`: The JSON in which the field is searched for. [String](../data-types/string.md#string) - `field_name`: The name of the field to search for. [String literal](../syntax#string) **Returned value** @@ -259,12 +259,12 @@ simpleJSONExtractRaw(json, field_name) **Parameters** -- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string) +- `json`: The JSON in which the field is searched for. [String](../data-types/string.md#string) - `field_name`: The name of the field to search for. [String literal](../syntax#string) **Returned value** -It returns the value of the field as a [`String`](../../sql-reference/data-types/string.md#string), including separators if the field exists, or an empty `String` otherwise. +It returns the value of the field as a [`String`](../data-types/string.md#string), including separators if the field exists, or an empty `String` otherwise. **Example** @@ -306,12 +306,12 @@ simpleJSONExtractString(json, field_name) **Parameters** -- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string) +- `json`: The JSON in which the field is searched for. [String](../data-types/string.md#string) - `field_name`: The name of the field to search for. [String literal](../syntax#string) **Returned value** -It returns the value of a field as a [`String`](../../sql-reference/data-types/string.md#string), including separators. The value is unescaped. It returns an empty `String`: if the field doesn't contain a double quoted string, if unescaping fails or if the field doesn't exist. +It returns the value of a field as a [`String`](../data-types/string.md#string), including separators. The value is unescaped. It returns an empty `String`: if the field doesn't contain a double quoted string, if unescaping fails or if the field doesn't exist. **Implementation details** @@ -386,7 +386,7 @@ SELECT isValidJSON('{"a": "hello", "b": [-100, 200.0, 300]}') = 1 SELECT isValidJSON('not a json') = 0 ``` -## JSONHas(json\[, indices_or_keys\]…) +## JSONHas(json\[, indices_or_keys\]...) If the value exists in the JSON document, `1` will be returned. @@ -419,7 +419,7 @@ SELECT JSONExtractKey('{"a": "hello", "b": [-100, 200.0, 300]}', -2) = 'a' SELECT JSONExtractString('{"a": "hello", "b": [-100, 200.0, 300]}', 1) = 'hello' ``` -## JSONLength(json\[, indices_or_keys\]…) +## JSONLength(json\[, indices_or_keys\]...) Return the length of a JSON array or a JSON object. @@ -432,7 +432,7 @@ SELECT JSONLength('{"a": "hello", "b": [-100, 200.0, 300]}', 'b') = 3 SELECT JSONLength('{"a": "hello", "b": [-100, 200.0, 300]}') = 2 ``` -## JSONType(json\[, indices_or_keys\]…) +## JSONType(json\[, indices_or_keys\]...) Return the type of a JSON value. @@ -446,13 +446,13 @@ SELECT JSONType('{"a": "hello", "b": [-100, 200.0, 300]}', 'a') = 'String' SELECT JSONType('{"a": "hello", "b": [-100, 200.0, 300]}', 'b') = 'Array' ``` -## JSONExtractUInt(json\[, indices_or_keys\]…) +## JSONExtractUInt(json\[, indices_or_keys\]...) -## JSONExtractInt(json\[, indices_or_keys\]…) +## JSONExtractInt(json\[, indices_or_keys\]...) -## JSONExtractFloat(json\[, indices_or_keys\]…) +## JSONExtractFloat(json\[, indices_or_keys\]...) -## JSONExtractBool(json\[, indices_or_keys\]…) +## JSONExtractBool(json\[, indices_or_keys\]...) Parses a JSON and extract a value. These functions are similar to `visitParam` functions. @@ -466,7 +466,7 @@ SELECT JSONExtractFloat('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 2) = 200 SELECT JSONExtractUInt('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', -1) = 300 ``` -## JSONExtractString(json\[, indices_or_keys\]…) +## JSONExtractString(json\[, indices_or_keys\]...) Parses a JSON and extract a string. This function is similar to `visitParamExtractString` functions. @@ -484,7 +484,7 @@ SELECT JSONExtractString('{"abc":"\\u263"}', 'abc') = '' SELECT JSONExtractString('{"abc":"hello}', 'abc') = '' ``` -## JSONExtract(json\[, indices_or_keys…\], Return_type) +## JSONExtract(json\[, indices_or_keys...\], Return_type) Parses a JSON and extract a value of the given ClickHouse data type. @@ -506,7 +506,7 @@ SELECT JSONExtract('{"day": "Thursday"}', 'day', 'Enum8(\'Sunday\' = 0, \'Monday SELECT JSONExtract('{"day": 5}', 'day', 'Enum8(\'Sunday\' = 0, \'Monday\' = 1, \'Tuesday\' = 2, \'Wednesday\' = 3, \'Thursday\' = 4, \'Friday\' = 5, \'Saturday\' = 6)') = 'Friday' ``` -## JSONExtractKeysAndValues(json\[, indices_or_keys…\], Value_type) +## JSONExtractKeysAndValues(json\[, indices_or_keys...\], Value_type) Parses key-value pairs from a JSON where the values are of the given ClickHouse data type. @@ -528,14 +528,12 @@ JSONExtractKeys(json[, a, b, c...]) **Arguments** -- `json` — [String](../../sql-reference/data-types/string.md) with valid JSON. -- `a, b, c...` — Comma-separated indices or keys that specify the path to the inner field in a nested JSON object. Each argument can be either a [String](../../sql-reference/data-types/string.md) to get the field by the key or an [Integer](../../sql-reference/data-types/int-uint.md) to get the N-th field (indexed from 1, negative integers count from the end). If not set, the whole JSON is parsed as the top-level object. Optional parameter. +- `json` — [String](../data-types/string.md) with valid JSON. +- `a, b, c...` — Comma-separated indices or keys that specify the path to the inner field in a nested JSON object. Each argument can be either a [String](../data-types/string.md) to get the field by the key or an [Integer](../data-types/int-uint.md) to get the N-th field (indexed from 1, negative integers count from the end). If not set, the whole JSON is parsed as the top-level object. Optional parameter. **Returned value** -Array with the keys of the JSON. - -Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). +Array with the keys of the JSON. [Array](../data-types/array.md)([String](../data-types/string.md)). **Example** @@ -554,7 +552,7 @@ text └────────────────────────────────────────────────────────────┘ ``` -## JSONExtractRaw(json\[, indices_or_keys\]…) +## JSONExtractRaw(json\[, indices_or_keys\]...) Returns a part of JSON as unparsed string. @@ -566,7 +564,7 @@ Example: SELECT JSONExtractRaw('{"a": "hello", "b": [-100, 200.0, 300]}', 'b') = '[-100, 200.0, 300]'; ``` -## JSONExtractArrayRaw(json\[, indices_or_keys…\]) +## JSONExtractArrayRaw(json\[, indices_or_keys...\]) Returns an array with elements of JSON array, each represented as unparsed string. @@ -590,15 +588,13 @@ JSONExtractKeysAndValuesRaw(json[, p, a, t, h]) **Arguments** -- `json` — [String](../../sql-reference/data-types/string.md) with valid JSON. -- `p, a, t, h` — Comma-separated indices or keys that specify the path to the inner field in a nested JSON object. Each argument can be either a [string](../../sql-reference/data-types/string.md) to get the field by the key or an [integer](../../sql-reference/data-types/int-uint.md) to get the N-th field (indexed from 1, negative integers count from the end). If not set, the whole JSON is parsed as the top-level object. Optional parameter. +- `json` — [String](../data-types/string.md) with valid JSON. +- `p, a, t, h` — Comma-separated indices or keys that specify the path to the inner field in a nested JSON object. Each argument can be either a [string](../data-types/string.md) to get the field by the key or an [integer](../data-types/int-uint.md) to get the N-th field (indexed from 1, negative integers count from the end). If not set, the whole JSON is parsed as the top-level object. Optional parameter. **Returned values** -- Array with `('key', 'value')` tuples. Both tuple members are strings. -- Empty array if the requested object does not exist, or input JSON is invalid. - -Type: [Array](../../sql-reference/data-types/array.md)([Tuple](../../sql-reference/data-types/tuple.md)([String](../../sql-reference/data-types/string.md), [String](../../sql-reference/data-types/string.md)). +- Array with `('key', 'value')` tuples. Both tuple members are strings. [Array](../data-types/array.md)([Tuple](../data-types/tuple.md)([String](../data-types/string.md), [String](../data-types/string.md)). +- Empty array if the requested object does not exist, or input JSON is invalid. [Array](../data-types/array.md)([Tuple](../data-types/tuple.md)([String](../data-types/string.md), [String](../data-types/string.md)). **Examples** @@ -723,9 +719,9 @@ Before version 21.11 the order of arguments was wrong, i.e. JSON_VALUE(path, jso ## toJSONString Serializes a value to its JSON representation. Various data types and nested structures are supported. -64-bit [integers](../../sql-reference/data-types/int-uint.md) or bigger (like `UInt64` or `Int128`) are enclosed in quotes by default. [output_format_json_quote_64bit_integers](../../operations/settings/settings.md#session_settings-output_format_json_quote_64bit_integers) controls this behavior. +64-bit [integers](../data-types/int-uint.md) or bigger (like `UInt64` or `Int128`) are enclosed in quotes by default. [output_format_json_quote_64bit_integers](../../operations/settings/settings.md#session_settings-output_format_json_quote_64bit_integers) controls this behavior. Special values `NaN` and `inf` are replaced with `null`. Enable [output_format_json_quote_denormals](../../operations/settings/settings.md#settings-output_format_json_quote_denormals) setting to show them. -When serializing an [Enum](../../sql-reference/data-types/enum.md) value, the function outputs its name. +When serializing an [Enum](../data-types/enum.md) value, the function outputs its name. **Syntax** @@ -739,14 +735,12 @@ toJSONString(value) **Returned value** -- JSON representation of the value. - -Type: [String](../../sql-reference/data-types/string.md). +- JSON representation of the value. [String](../data-types/string.md). **Example** -The first example shows serialization of a [Map](../../sql-reference/data-types/map.md). -The second example shows some special values wrapped into a [Tuple](../../sql-reference/data-types/tuple.md). +The first example shows serialization of a [Map](../data-types/map.md). +The second example shows some special values wrapped into a [Tuple](../data-types/tuple.md). Query: @@ -782,13 +776,11 @@ Alias: `JSON_ARRAY_LENGTH(json)`. **Arguments** -- `json` — [String](../../sql-reference/data-types/string.md) with valid JSON. +- `json` — [String](../data-types/string.md) with valid JSON. **Returned value** -- If `json` is a valid JSON array string, returns the number of array elements, otherwise returns NULL. - -Type: [Nullable(UInt64)](../../sql-reference/data-types/int-uint.md). +- If `json` is a valid JSON array string, returns the number of array elements, otherwise returns NULL. [Nullable(UInt64)](../data-types/int-uint.md). **Example** @@ -815,13 +807,11 @@ jsonMergePatch(json1, json2, ...) **Arguments** -- `json` — [String](../../sql-reference/data-types/string.md) with valid JSON. +- `json` — [String](../data-types/string.md) with valid JSON. **Returned value** -- If JSON object strings are valid, return the merged JSON object string. - -Type: [String](../../sql-reference/data-types/string.md). +- If JSON object strings are valid, return the merged JSON object string. [String](../data-types/string.md). **Example** diff --git a/docs/en/sql-reference/functions/logical-functions.md b/docs/en/sql-reference/functions/logical-functions.md index 138b804a575..7222dbeeb0d 100644 --- a/docs/en/sql-reference/functions/logical-functions.md +++ b/docs/en/sql-reference/functions/logical-functions.md @@ -6,7 +6,7 @@ sidebar_label: Logical # Logical Functions -Below functions perform logical operations on arguments of arbitrary numeric types. They return either 0 or 1 as [UInt8](../../sql-reference/data-types/int-uint.md) or in some cases `NULL`. +Below functions perform logical operations on arguments of arbitrary numeric types. They return either 0 or 1 as [UInt8](../data-types/int-uint.md) or in some cases `NULL`. Zero as an argument is considered `false`, non-zero values are considered `true`. @@ -26,7 +26,7 @@ Alias: The [AND operator](../../sql-reference/operators/index.md#logical-and-ope **Arguments** -- `val1, val2, ...` — List of at least two values. [Int](../../sql-reference/data-types/int-uint.md), [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Nullable](../../sql-reference/data-types/nullable.md). +- `val1, val2, ...` — List of at least two values. [Int](../data-types/int-uint.md), [UInt](../data-types/int-uint.md), [Float](../data-types/float.md) or [Nullable](../data-types/nullable.md). **Returned value** @@ -80,7 +80,7 @@ Alias: The [OR operator](../../sql-reference/operators/index.md#logical-or-opera **Arguments** -- `val1, val2, ...` — List of at least two values. [Int](../../sql-reference/data-types/int-uint.md), [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Nullable](../../sql-reference/data-types/nullable.md). +- `val1, val2, ...` — List of at least two values. [Int](../data-types/int-uint.md), [UInt](../data-types/int-uint.md), [Float](../data-types/float.md) or [Nullable](../data-types/nullable.md). **Returned value** @@ -132,7 +132,7 @@ Alias: The [Negation operator](../../sql-reference/operators/index.md#logical-ne **Arguments** -- `val` — The value. [Int](../../sql-reference/data-types/int-uint.md), [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Nullable](../../sql-reference/data-types/nullable.md). +- `val` — The value. [Int](../data-types/int-uint.md), [UInt](../data-types/int-uint.md), [Float](../data-types/float.md) or [Nullable](../data-types/nullable.md). **Returned value** @@ -168,7 +168,7 @@ xor(val1, val2...) **Arguments** -- `val1, val2, ...` — List of at least two values. [Int](../../sql-reference/data-types/int-uint.md), [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Nullable](../../sql-reference/data-types/nullable.md). +- `val1, val2, ...` — List of at least two values. [Int](../data-types/int-uint.md), [UInt](../data-types/int-uint.md), [Float](../data-types/float.md) or [Nullable](../data-types/nullable.md). **Returned value** diff --git a/docs/en/sql-reference/functions/math-functions.md b/docs/en/sql-reference/functions/math-functions.md index 945166056af..7f50fa933b6 100644 --- a/docs/en/sql-reference/functions/math-functions.md +++ b/docs/en/sql-reference/functions/math-functions.md @@ -18,7 +18,7 @@ e() **Returned value** -Type: [Float64](../../sql-reference/data-types/float.md). +Type: [Float64](../data-types/float.md). ## pi @@ -31,7 +31,7 @@ pi() ``` **Returned value** -Type: [Float64](../../sql-reference/data-types/float.md). +Type: [Float64](../data-types/float.md). ## exp @@ -45,11 +45,11 @@ exp(x) **Arguments** -- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` - [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** -Type: [Float*](../../sql-reference/data-types/float.md). +Type: [Float*](../data-types/float.md). ## log @@ -65,11 +65,11 @@ Alias: `ln(x)` **Arguments** -- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` - [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** -Type: [Float*](../../sql-reference/data-types/float.md). +Type: [Float*](../data-types/float.md). ## exp2 @@ -83,11 +83,11 @@ exp2(x) **Arguments** -- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` - [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** -Type: [Float*](../../sql-reference/data-types/float.md). +Type: [Float*](../data-types/float.md). ## intExp2 @@ -111,11 +111,11 @@ log2(x) **Arguments** -- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` - [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** -Type: [Float*](../../sql-reference/data-types/float.md). +Type: [Float*](../data-types/float.md). ## exp10 @@ -129,11 +129,11 @@ exp10(x) **Arguments** -- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` - [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** -Type: [Float*](../../sql-reference/data-types/float.md). +Type: [Float*](../data-types/float.md). ## intExp10 @@ -157,11 +157,11 @@ log10(x) **Arguments** -- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` - [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** -Type: [Float*](../../sql-reference/data-types/float.md). +Type: [Float*](../data-types/float.md). ## sqrt @@ -173,11 +173,11 @@ sqrt(x) **Arguments** -- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` - [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** -Type: [Float*](../../sql-reference/data-types/float.md). +Type: [Float*](../data-types/float.md). ## cbrt @@ -189,11 +189,11 @@ cbrt(x) **Arguments** -- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` - [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** -Type: [Float*](../../sql-reference/data-types/float.md). +Type: [Float*](../data-types/float.md). ## erf @@ -207,11 +207,11 @@ erf(x) **Arguments** -- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` - [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** -Type: [Float*](../../sql-reference/data-types/float.md). +Type: [Float*](../data-types/float.md). **Example** @@ -239,11 +239,11 @@ erfc(x) **Arguments** -- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` - [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** -Type: [Float*](../../sql-reference/data-types/float.md). +Type: [Float*](../data-types/float.md). ## lgamma @@ -257,11 +257,11 @@ lgamma(x) **Arguments** -- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` - [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** -Type: [Float*](../../sql-reference/data-types/float.md). +Type: [Float*](../data-types/float.md). ## tgamma @@ -275,11 +275,11 @@ gamma(x) **Arguments** -- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` - [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** -Type: [Float*](../../sql-reference/data-types/float.md). +Type: [Float*](../data-types/float.md). ## sin @@ -293,11 +293,11 @@ sin(x) **Arguments** -- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` - [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** -Type: [Float*](../../sql-reference/data-types/float.md). +Type: [Float*](../data-types/float.md). **Example** @@ -323,11 +323,11 @@ cos(x) **Arguments** -- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` - [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** -Type: [Float*](../../sql-reference/data-types/float.md). +Type: [Float*](../data-types/float.md). ## tan @@ -341,11 +341,11 @@ tan(x) **Arguments** -- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` - [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** -Type: [Float*](../../sql-reference/data-types/float.md). +Type: [Float*](../data-types/float.md). ## asin @@ -359,11 +359,11 @@ asin(x) **Arguments** -- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` - [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** -Type: [Float*](../../sql-reference/data-types/float.md). +Type: [Float*](../data-types/float.md). ## acos @@ -377,11 +377,11 @@ acos(x) **Arguments** -- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` - [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** -Type: [Float*](../../sql-reference/data-types/float.md). +Type: [Float*](../data-types/float.md). ## atan @@ -395,11 +395,11 @@ atan(x) **Arguments** -- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` - [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** -Type: [Float*](../../sql-reference/data-types/float.md). +Type: [Float*](../data-types/float.md). ## pow @@ -415,12 +415,12 @@ Alias: `power(x, y)` **Arguments** -- `x` - [(U)Int8/16/32/64](../../sql-reference/data-types/int-uint.md) or [Float*](../../sql-reference/data-types/float.md) -- `y` - [(U)Int8/16/32/64](../../sql-reference/data-types/int-uint.md) or [Float*](../../sql-reference/data-types/float.md) +- `x` - [(U)Int8/16/32/64](../data-types/int-uint.md) or [Float*](../data-types/float.md) +- `y` - [(U)Int8/16/32/64](../data-types/int-uint.md) or [Float*](../data-types/float.md) **Returned value** -Type: [Float64](../../sql-reference/data-types/float.md). +Type: [Float64](../data-types/float.md). ## cosh @@ -434,13 +434,13 @@ cosh(x) **Arguments** -- `x` — The angle, in radians. Values from the interval: `-∞ < x < +∞`. [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` — The angle, in radians. Values from the interval: `-∞ < x < +∞`. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** - Values from the interval: `1 <= cosh(x) < +∞`. -Type: [Float64](../../sql-reference/data-types/float.md#float32-float64). +Type: [Float64](../data-types/float.md#float32-float64). **Example** @@ -468,13 +468,13 @@ acosh(x) **Arguments** -- `x` — Hyperbolic cosine of angle. Values from the interval: `1 <= x < +∞`. [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` — Hyperbolic cosine of angle. Values from the interval: `1 <= x < +∞`. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** - The angle, in radians. Values from the interval: `0 <= acosh(x) < +∞`. -Type: [Float64](../../sql-reference/data-types/float.md#float32-float64). +Type: [Float64](../data-types/float.md#float32-float64). **Example** @@ -502,13 +502,13 @@ sinh(x) **Arguments** -- `x` — The angle, in radians. Values from the interval: `-∞ < x < +∞`. [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` — The angle, in radians. Values from the interval: `-∞ < x < +∞`. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** - Values from the interval: `-∞ < sinh(x) < +∞`. -Type: [Float64](../../sql-reference/data-types/float.md#float32-float64). +Type: [Float64](../data-types/float.md#float32-float64). **Example** @@ -536,13 +536,13 @@ asinh(x) **Arguments** -- `x` — Hyperbolic sine of angle. Values from the interval: `-∞ < x < +∞`. [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` — Hyperbolic sine of angle. Values from the interval: `-∞ < x < +∞`. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** - The angle, in radians. Values from the interval: `-∞ < asinh(x) < +∞`. -Type: [Float64](../../sql-reference/data-types/float.md#float32-float64). +Type: [Float64](../data-types/float.md#float32-float64). **Example** @@ -569,13 +569,13 @@ tanh(x) **Arguments** -- `x` — The angle, in radians. Values from the interval: `-∞ < x < +∞`. [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` — The angle, in radians. Values from the interval: `-∞ < x < +∞`. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** - Values from the interval: `-1 < tanh(x) < 1`. -Type: [Float*](../../sql-reference/data-types/float.md#float32-float64). +Type: [Float*](../data-types/float.md#float32-float64). **Example** @@ -601,13 +601,13 @@ atanh(x) **Arguments** -- `x` — Hyperbolic tangent of angle. Values from the interval: `–1 < x < 1`. [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` — Hyperbolic tangent of angle. Values from the interval: `–1 < x < 1`. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** - The angle, in radians. Values from the interval: `-∞ < atanh(x) < +∞`. -Type: [Float64](../../sql-reference/data-types/float.md#float32-float64). +Type: [Float64](../data-types/float.md#float32-float64). **Example** @@ -635,14 +635,14 @@ atan2(y, x) **Arguments** -- `y` — y-coordinate of the point through which the ray passes. [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md). -- `x` — x-coordinate of the point through which the ray passes. [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md). +- `y` — y-coordinate of the point through which the ray passes. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). +- `x` — x-coordinate of the point through which the ray passes. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). **Returned value** - The angle `θ` such that `−π < θ ≤ π`, in radians. -Type: [Float64](../../sql-reference/data-types/float.md#float32-float64). +Type: [Float64](../data-types/float.md#float32-float64). **Example** @@ -670,14 +670,14 @@ hypot(x, y) **Arguments** -- `x` — The first cathetus of a right-angle triangle. [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md). -- `y` — The second cathetus of a right-angle triangle. [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md). +- `x` — The first cathetus of a right-angle triangle. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). +- `y` — The second cathetus of a right-angle triangle. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). **Returned value** - The length of the hypotenuse of a right-angle triangle. -Type: [Float64](../../sql-reference/data-types/float.md#float32-float64). +Type: [Float64](../data-types/float.md#float32-float64). **Example** @@ -705,13 +705,13 @@ log1p(x) **Arguments** -- `x` — Values from the interval: `-1 < x < +∞`. [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` — Values from the interval: `-1 < x < +∞`. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** - Values from the interval: `-∞ < log1p(x) < +∞`. -Type: [Float64](../../sql-reference/data-types/float.md#float32-float64). +Type: [Float64](../data-types/float.md#float32-float64). **Example** @@ -747,7 +747,7 @@ sign(x) - 0 for `x = 0` - 1 for `x > 0` -Type: [Int8](../../sql-reference/data-types/int-uint.md). +Type: [Int8](../data-types/int-uint.md). **Examples** @@ -804,11 +804,11 @@ sigmoid(x) **Parameters** -- `x` — input value. Values from the interval: `-∞ < x < +∞`. [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` — input value. Values from the interval: `-∞ < x < +∞`. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** -- Corresponding value along the sigmoid curve between 0 and 1. [Float64](../../sql-reference/data-types/float.md). +- Corresponding value along the sigmoid curve between 0 and 1. [Float64](../data-types/float.md). **Example** @@ -838,13 +838,11 @@ degrees(x) **Arguments** -- `x` — Input in radians. [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` — Input in radians. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** -- Value in degrees. - -Type: [Float64](../../sql-reference/data-types/float.md#float32-float64). +- Value in degrees. [Float64](../data-types/float.md#float32-float64). **Example** @@ -872,13 +870,13 @@ radians(x) **Arguments** -- `x` — Input in degrees. [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` — Input in degrees. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** - Value in radians. -Type: [Float64](../../sql-reference/data-types/float.md#float32-float64). +Type: [Float64](../data-types/float.md#float32-float64). **Example** @@ -947,3 +945,49 @@ Result: │ 11 │ └──────────────────────────────────┘ ``` + +## proportionsZTest + +Returns test statistics for the two proportion Z-test - a statistical test for comparing the proportions from two populations `x` and `y`. + +**Syntax** + +```sql +proportionsZTest(successes_x, successes_y, trials_x, trials_y, conf_level, pool_type) +``` + +**Arguments** + +- `successes_x`: Number of successes in population `x`. [UInt64](../data-types/int-uint.md). +- `successes_y`: Number of successes in population `y`. [UInt64](../data-types/int-uint.md). +- `trials_x`: Number of trials in population `x`. [UInt64](../data-types/int-uint.md). +- `trials_y`: Number of trials in population `y`. [UInt64](../data-types/int-uint.md). +- `conf_level`: Confidence level for the test. [Float64](../data-types/float.md). +- `pool_type`: Selection of pooling (way in which the standard error is estimated). Can be either `unpooled` or `pooled`. [String](../data-types/string.md). + +:::note +For argument `pool_type`: In the pooled version, the two proportions are averaged, and only one proportion is used to estimate the standard error. In the unpooled version, the two proportions are used separately. +::: + +**Returned value** + +- `z_stat`: Z statistic. [Float64](../data-types/float.md). +- `p_val`: P value. [Float64](../data-types/float.md). +- `ci_low`: The lower confidence interval. [Float64](../data-types/float.md). +- `ci_high`: The upper confidence interval. [Float64](../data-types/float.md). + +**Example** + +Query: + +```sql +SELECT proportionsZTest(10, 11, 100, 101, 0.95, 'unpooled'); +``` + +Result: + +```response +┌─proportionsZTest(10, 11, 100, 101, 0.95, 'unpooled')───────────────────────────────┐ +│ (-0.20656724435948853,0.8363478437079654,-0.09345975390115283,0.07563797172293502) │ +└────────────────────────────────────────────────────────────────────────────────────┘ +``` \ No newline at end of file diff --git a/docs/en/sql-reference/functions/nlp-functions.md b/docs/en/sql-reference/functions/nlp-functions.md index 3e0458d226d..4bfa181a35f 100644 --- a/docs/en/sql-reference/functions/nlp-functions.md +++ b/docs/en/sql-reference/functions/nlp-functions.md @@ -23,7 +23,7 @@ stem('language', word) ### Arguments - `language` — Language which rules will be applied. Use the two letter [ISO 639-1 code](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes). -- `word` — word that needs to be stemmed. Must be in lowercase. [String](../../sql-reference/data-types/string.md#string). +- `word` — word that needs to be stemmed. Must be in lowercase. [String](../data-types/string.md#string). ### Examples @@ -88,8 +88,8 @@ lemmatize('language', word) ### Arguments -- `language` — Language which rules will be applied. [String](../../sql-reference/data-types/string.md#string). -- `word` — Word that needs to be lemmatized. Must be lowercase. [String](../../sql-reference/data-types/string.md#string). +- `language` — Language which rules will be applied. [String](../data-types/string.md#string). +- `word` — Word that needs to be lemmatized. Must be lowercase. [String](../data-types/string.md#string). ### Examples @@ -139,8 +139,8 @@ synonyms('extension_name', word) ### Arguments -- `extension_name` — Name of the extension in which search will be performed. [String](../../sql-reference/data-types/string.md#string). -- `word` — Word that will be searched in extension. [String](../../sql-reference/data-types/string.md#string). +- `extension_name` — Name of the extension in which search will be performed. [String](../data-types/string.md#string). +- `word` — Word that will be searched in extension. [String](../data-types/string.md#string). ### Examples @@ -188,7 +188,7 @@ detectLanguage('text_to_be_analyzed') ### Arguments -- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../../sql-reference/data-types/string.md#string). +- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../data-types/string.md#string). ### Returned value @@ -226,7 +226,7 @@ detectLanguageMixed('text_to_be_analyzed') ### Arguments -- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../../sql-reference/data-types/string.md#string). +- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../data-types/string.md#string). ### Returned value @@ -262,7 +262,7 @@ detectLanguageUnknown('text_to_be_analyzed') ### Arguments -- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../../sql-reference/data-types/string.md#string). +- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../data-types/string.md#string). ### Returned value @@ -302,7 +302,7 @@ detectCharset('text_to_be_analyzed') ### Arguments -- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../../sql-reference/data-types/string.md#string). +- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../data-types/string.md#string). ### Returned value diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index 12b565d5358..dfe1224f7b8 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -6,11 +6,21 @@ sidebar_label: Other # Other Functions -## hostName() +## hostName Returns the name of the host on which this function was executed. If the function executes on a remote server (distributed processing), the remote server name is returned. If the function executes in the context of a distributed table, it generates a normal column with values relevant to each shard. Otherwise it produces a constant value. +**Syntax** + +```sql +hostName() +``` + +**Returned value** + +- Host name. [String](../data-types/string.md). + ## getMacro {#getMacro} Returns a named value from the [macros](../../operations/server-configuration-parameters/settings.md#macros) section of the server configuration. @@ -23,13 +33,11 @@ getMacro(name); **Arguments** -- `name` — Macro name to retrieve from the `` section. [String](../../sql-reference/data-types/string.md#string). +- `name` — Macro name to retrieve from the `` section. [String](../data-types/string.md#string). **Returned value** -- Value of the specified macro. - -Type: [String](../../sql-reference/data-types/string.md). +- Value of the specified macro. [String](../data-types/string.md). **Example** @@ -82,9 +90,7 @@ This function is case-insensitive. **Returned value** -- String with the fully qualified domain name. - -Type: `String`. +- String with the fully qualified domain name. [String](../data-types/string.md). **Example** @@ -110,7 +116,7 @@ basename(expr) **Arguments** -- `expr` — A value of type [String](../../sql-reference/data-types/string.md). Backslashes must be escaped. +- `expr` — A value of type [String](../data-types/string.md). Backslashes must be escaped. **Returned Value** @@ -163,34 +169,58 @@ Result: └────────────────┴────────────────────────────┘ ``` -## visibleWidth(x) +## visibleWidth Calculates the approximate width when outputting values to the console in text format (tab-separated). -This function is used by the system to implement Pretty formats. +This function is used by the system to implement [Pretty formats](../../interfaces/formats.md). `NULL` is represented as a string corresponding to `NULL` in `Pretty` formats. +**Syntax** + +```sql +visibleWidth(x) +``` + +**Example** + +Query: + ```sql SELECT visibleWidth(NULL) ``` +Result: + ```text ┌─visibleWidth(NULL)─┐ │ 4 │ └────────────────────┘ ``` -## toTypeName(x) +## toTypeName Returns the type name of the passed argument. If `NULL` is passed, then the function returns type `Nullable(Nothing)`, which corresponds to ClickHouse's internal `NULL` representation. -## blockSize() {#blockSize} +**Syntax** + +```sql +toTypeName(x) +``` + +## blockSize {#blockSize} In ClickHouse, queries are processed in blocks (chunks). This function returns the size (row count) of the block the function is called on. +**Syntax** + +```sql +blockSize() +``` + ## byteSize Returns an estimation of uncompressed byte size of its arguments in memory. @@ -207,13 +237,11 @@ byteSize(argument [, ...]) **Returned value** -- Estimation of byte size of the arguments in memory. - -Type: [UInt64](../../sql-reference/data-types/int-uint.md). +- Estimation of byte size of the arguments in memory. [UInt64](../data-types/int-uint.md). **Examples** -For [String](../../sql-reference/data-types/string.md) arguments, the function returns the string length + 9 (terminating zero + length). +For [String](../data-types/string.md) arguments, the function returns the string length + 9 (terminating zero + length). Query: @@ -288,16 +316,28 @@ Result: └────────────────────────────┘ ``` -## materialize(x) +## materialize Turns a constant into a full column containing a single value. Full columns and constants are represented differently in memory. Functions usually execute different code for normal and constant arguments, although the result should typically be the same. This function can be used to debug this behavior. -## ignore(…) +**Syntax** + +```sql +materialize(x) +``` + +## ignore Accepts any arguments, including `NULL` and does nothing. Always returns 0. The argument is internally still evaluated. Useful e.g. for benchmarks. +**Syntax** + +```sql +ignore(x) +``` + ## sleep Used to introduce a delay or pause in the execution of a query. It is primarily used for testing and debugging purposes. @@ -310,7 +350,7 @@ sleep(seconds) **Arguments** -- `seconds`: [UInt*](../../sql-reference/data-types/int-uint.md) or [Float](../../sql-reference/data-types/float.md) The number of seconds to pause the query execution to a maximum of 3 seconds. It can be a floating-point value to specify fractional seconds. +- `seconds`: [UInt*](../data-types/int-uint.md) or [Float](../data-types/float.md) The number of seconds to pause the query execution to a maximum of 3 seconds. It can be a floating-point value to specify fractional seconds. **Returned value** @@ -360,7 +400,7 @@ sleepEachRow(seconds) **Arguments** -- `seconds`: [UInt*](../../sql-reference/data-types/int-uint.md) or [Float*](../../sql-reference/data-types/float.md) The number of seconds to pause the query execution for each row in the result set to a maximum of 3 seconds. It can be a floating-point value to specify fractional seconds. +- `seconds`: [UInt*](../data-types/int-uint.md) or [Float*](../data-types/float.md) The number of seconds to pause the query execution for each row in the result set to a maximum of 3 seconds. It can be a floating-point value to specify fractional seconds. **Returned value** @@ -392,27 +432,33 @@ The `sleepEachRow()` function is primarily used for testing and debugging purpos Like the [`sleep()` function](#sleep), it's important to use `sleepEachRow()` judiciously and only when necessary, as it can significantly impact the overall performance and responsiveness of your ClickHouse system, especially when dealing with large result sets. -## currentDatabase() +## currentDatabase Returns the name of the current database. Useful in table engine parameters of `CREATE TABLE` queries where you need to specify the database. -## currentUser() {#currentUser} +**Syntax** + +```sql +currentDatabase() +``` + +## currentUser {#currentUser} Returns the name of the current user. In case of a distributed query, the name of the user who initiated the query is returned. +**Syntax** + ```sql -SELECT currentUser(); +currentUser() ``` Aliases: `user()`, `USER()`, `current_user()`. Aliases are case insensitive. **Returned values** -- The name of the current user. -- In distributed queries, the login of the user who initiated the query. - -Type: `String`. +- The name of the current user. [String](../data-types/string.md). +- In distributed queries, the login of the user who initiated the query. [String](../data-types/string.md). **Example** @@ -448,10 +494,8 @@ isConstant(x) **Returned values** -- `1` if `x` is constant. -- `0` if `x` is non-constant. - -Type: [UInt8](../../sql-reference/data-types/int-uint.md). +- `1` if `x` is constant. [UInt8](../data-types/int-uint.md). +- `0` if `x` is non-constant. [UInt8](../data-types/int-uint.md). **Examples** @@ -497,52 +541,6 @@ Result: └────────────────────┘ ``` -## isFinite(x) - -Returns 1 if the Float32 or Float64 argument not infinite and not a NaN, otherwise this function returns 0. - -## isInfinite(x) - -Returns 1 if the Float32 or Float64 argument is infinite, otherwise this function returns 0. Note that 0 is returned for a NaN. - -## ifNotFinite - -Checks whether a floating point value is finite. - -**Syntax** - -```sql -ifNotFinite(x,y) -``` - -**Arguments** - -- `x` — Value to check for infinity. Type: [Float\*](../../sql-reference/data-types/float.md). -- `y` — Fallback value. Type: [Float\*](../../sql-reference/data-types/float.md). - -**Returned value** - -- `x` if `x` is finite. -- `y` if `x` is not finite. - -**Example** - -Query: - - SELECT 1/0 as infimum, ifNotFinite(infimum,42) - -Result: - - ┌─infimum─┬─ifNotFinite(divide(1, 0), 42)─┐ - │ inf │ 42 │ - └─────────┴───────────────────────────────┘ - -You can get similar result by using the [ternary operator](../../sql-reference/functions/conditional-functions.md#ternary-operator): `isFinite(x) ? x : y`. - -## isNaN(x) - -Returns 1 if the Float32 and Float64 argument is NaN, otherwise this function 0. - ## hasColumnInTable Given the database name, the table name, and the column name as constant strings, returns 1 if the given column exists, otherwise 0. @@ -733,11 +731,19 @@ LIMIT 10 └────────────────┴─────────┘ ``` -## formatReadableDecimalSize(x) +## formatReadableDecimalSize Given a size (number of bytes), this function returns a readable, rounded size with suffix (KB, MB, etc.) as string. -Example: +**Syntax** + +```sql +formatReadableDecimalSize(x) +``` + +**Example** + +Query: ```sql SELECT @@ -745,6 +751,8 @@ SELECT formatReadableDecimalSize(filesize_bytes) AS filesize ``` +Result: + ```text ┌─filesize_bytes─┬─filesize───┐ │ 1 │ 1.00 B │ @@ -754,11 +762,20 @@ SELECT └────────────────┴────────────┘ ``` -## formatReadableSize(x) +## formatReadableSize Given a size (number of bytes), this function returns a readable, rounded size with suffix (KiB, MiB, etc.) as string. -Example: +**Syntax** + +```sql +formatReadableSize(x) +``` +Alias: `FORMAT_BYTES`. + +**Example** + +Query: ```sql SELECT @@ -766,7 +783,7 @@ SELECT formatReadableSize(filesize_bytes) AS filesize ``` -Alias: `FORMAT_BYTES`. +Result: ```text ┌─filesize_bytes─┬─filesize───┐ @@ -777,11 +794,19 @@ Alias: `FORMAT_BYTES`. └────────────────┴────────────┘ ``` -## formatReadableQuantity(x) +## formatReadableQuantity Given a number, this function returns a rounded number with suffix (thousand, million, billion, etc.) as string. -Example: +**Syntax** + +```sql +formatReadableQuantity(x) +``` + +**Example** + +Query: ```sql SELECT @@ -789,6 +814,8 @@ SELECT formatReadableQuantity(number) AS number_for_humans ``` +Result: + ```text ┌─────────number─┬─number_for_humans─┐ │ 1024 │ 1.02 thousand │ @@ -903,15 +930,27 @@ SELECT parseTimeDelta('1yr2mo') └──────────────────────────┘ ``` -## least(a, b) +## least Returns the smaller value of a and b. -## greatest(a, b) +**Syntax** + +```sql +least(a, b) +``` + +## greatest Returns the larger value of a and b. -## uptime() +**Syntax** + +```sql +greatest(a, b) +``` + +## uptime Returns the server’s uptime in seconds. If executed in the context of a distributed table, this function generates a normal column with values relevant to each shard. Otherwise it produces a constant value. @@ -924,9 +963,7 @@ uptime() **Returned value** -- Time value of seconds. - -Type: [UInt32](/docs/en/sql-reference/data-types/int-uint.md). +- Time value of seconds. [UInt32](../data-types/int-uint.md). **Example** @@ -944,7 +981,7 @@ Result: └────────┘ ``` -## version() +## version Returns the current version of ClickHouse as a string in the form of: @@ -971,7 +1008,7 @@ None. **Returned value** -Type: [String](../data-types/string) +- Current version of ClickHouse. [String](../data-types/string). **Implementation details** @@ -993,11 +1030,17 @@ SELECT version() └───────────┘ ``` -## buildId() +## buildId Returns the build ID generated by a compiler for the running ClickHouse server binary. If executed in the context of a distributed table, this function generates a normal column with values relevant to each shard. Otherwise it produces a constant value. +**Syntax** + +```sql +buildId() +``` + ## blockNumber Returns a monotonically increasing sequence number of the [block](../../development/architecture.md#block) containing the row. @@ -1160,9 +1203,6 @@ Result: └────────────────────────┘ ``` - - - ## neighbor The window function that provides access to a row at a specified offset before or after the current row of a given column. @@ -1186,7 +1226,7 @@ To prevent that you can create a subquery with [ORDER BY](../../sql-reference/st **Arguments** - `column` — A column name or scalar expression. -- `offset` — The number of rows to look before or ahead of the current row in `column`. [Int64](../../sql-reference/data-types/int-uint.md). +- `offset` — The number of rows to look before or ahead of the current row in `column`. [Int64](../data-types/int-uint.md). - `default_value` — Optional. The returned value if offset is beyond the block boundaries. Type of data blocks affected. **Returned values** @@ -1194,7 +1234,9 @@ To prevent that you can create a subquery with [ORDER BY](../../sql-reference/st - Value of `column` with `offset` distance from current row, if `offset` is not outside the block boundaries. - The default value of `column` or `default_value` (if given), if `offset` is outside the block boundaries. -Type: type of data blocks affected or default value type. +:::note +The return type will be that of the data blocks affected or the default value type. +::: **Example** @@ -1281,7 +1323,7 @@ Result: └────────────┴───────┴───────────┴────────────────┘ ``` -## runningDifference(x) {#runningDifference} +## runningDifference {#runningDifference} Calculates the difference between two consecutive row values in the data block. Returns 0 for the first row, and for subsequent rows the difference to the previous row. @@ -1296,7 +1338,15 @@ The result of the function depends on the affected data blocks and the order of The order of rows during calculation of `runningDifference()` can differ from the order of rows returned to the user. To prevent that you can create a subquery with [ORDER BY](../../sql-reference/statements/select/order-by.md) and call the function from outside the subquery. -Example: +**Syntax** + +```sql +runningDifference(x) +``` + +**Example** + +Query: ```sql SELECT @@ -1315,6 +1365,8 @@ FROM ) ``` +Result: + ```text ┌─EventID─┬───────────EventTime─┬─delta─┐ │ 1106 │ 2016-11-24 00:00:04 │ 0 │ @@ -1327,6 +1379,8 @@ FROM Please note that the block size affects the result. The internal state of `runningDifference` state is reset for each new block. +Query: + ```sql SELECT number, @@ -1335,6 +1389,8 @@ FROM numbers(100000) WHERE diff != 1 ``` +Result: + ```text ┌─number─┬─diff─┐ │ 0 │ 0 │ @@ -1344,6 +1400,8 @@ WHERE diff != 1 └────────┴──────┘ ``` +Query: + ```sql set max_block_size=100000 -- default value is 65536! @@ -1354,6 +1412,8 @@ FROM numbers(100000) WHERE diff != 1 ``` +Result: + ```text ┌─number─┬─diff─┐ │ 0 │ 0 │ @@ -1386,14 +1446,12 @@ runningConcurrency(start, end) **Arguments** -- `start` — A column with the start time of events. [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md), or [DateTime64](../../sql-reference/data-types/datetime64.md). -- `end` — A column with the end time of events. [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md), or [DateTime64](../../sql-reference/data-types/datetime64.md). +- `start` — A column with the start time of events. [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), or [DateTime64](../data-types/datetime64.md). +- `end` — A column with the end time of events. [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), or [DateTime64](../data-types/datetime64.md). **Returned values** -- The number of concurrent events at each event start time. - -Type: [UInt32](../../sql-reference/data-types/int-uint.md) +- The number of concurrent events at each event start time. [UInt32](../data-types/int-uint.md) **Example** @@ -1425,23 +1483,43 @@ Result: └────────────┴────────────────────────────────┘ ``` -## MACNumToString(num) +## MACNumToString Interprets a UInt64 number as a MAC address in big endian format. Returns the corresponding MAC address in format AA:BB:CC:DD:EE:FF (colon-separated numbers in hexadecimal form) as string. -## MACStringToNum(s) +**Syntax** + +```sql +MACNumToString(num) +``` + +## MACStringToNum The inverse function of MACNumToString. If the MAC address has an invalid format, it returns 0. -## MACStringToOUI(s) +**Syntax** + +```sql +MACStringToNum(s) +``` + +## MACStringToOUI Given a MAC address in format AA:BB:CC:DD:EE:FF (colon-separated numbers in hexadecimal form), returns the first three octets as a UInt64 number. If the MAC address has an invalid format, it returns 0. +**Syntax** + +```sql +MACStringToOUI(s) +``` + ## getSizeOfEnumType -Returns the number of fields in [Enum](../../sql-reference/data-types/enum.md). +Returns the number of fields in [Enum](../data-types/enum.md). An exception is thrown if the type is not `Enum`. +**Syntax** + ```sql getSizeOfEnumType(value) ``` @@ -1502,6 +1580,8 @@ Result: Returns the internal name of the data type that represents the value. +**Syntax** + ```sql toColumnTypeName(value) ``` @@ -1580,6 +1660,8 @@ Returns the default value for the given data type. Does not include default values for custom columns set by the user. +**Syntax** + ```sql defaultValueOfArgumentType(expression) ``` @@ -1592,7 +1674,7 @@ defaultValueOfArgumentType(expression) - `0` for numbers. - Empty string for strings. -- `ᴺᵁᴸᴸ` for [Nullable](../../sql-reference/data-types/nullable.md). +- `ᴺᵁᴸᴸ` for [Nullable](../data-types/nullable.md). **Example** @@ -1642,7 +1724,7 @@ defaultValueOfTypeName(type) - `0` for numbers. - Empty string for strings. -- `ᴺᵁᴸᴸ` for [Nullable](../../sql-reference/data-types/nullable.md). +- `ᴺᵁᴸᴸ` for [Nullable](../data-types/nullable.md). **Example** @@ -1688,7 +1770,7 @@ SELECT * FROM table WHERE indexHint() **Returned value** -Type: [Uint8](https://clickhouse.com/docs/en/data_types/int_uint/#diapazony-uint). +- `1`. [Uint8](../data-types/int-uint.md). **Example** @@ -1778,29 +1860,31 @@ Result: Creates an array with a single value. -Used for the internal implementation of [arrayJoin](../../sql-reference/functions/array-join.md#functions_arrayjoin). +:::note +This function is used for the internal implementation of [arrayJoin](../../sql-reference/functions/array-join.md#functions_arrayjoin). +::: + +**Syntax** ```sql -SELECT replicate(x, arr); +replicate(x, arr) ``` -**Arguments:** +**Arguments** -- `arr` — An array. - `x` — The value to fill the result array with. +- `arr` — An array. [Array](../data-types/array.md). **Returned value** -An array of the lame length as `arr` filled with value `x`. - -Type: `Array`. +An array of the lame length as `arr` filled with value `x`. [Array](../data-types/array.md). **Example** Query: ```sql -SELECT replicate(1, ['a', 'b', 'c']) +SELECT replicate(1, ['a', 'b', 'c']); ``` Result: @@ -1811,6 +1895,36 @@ Result: └───────────────────────────────┘ ``` +## revision + +Returns the current ClickHouse [server revision](../../operations/system-tables/metrics#revision). + +**Syntax** + +```sql +revision() +``` + +**Returned value** + +- The current ClickHouse server revision. [UInt32](../data-types/int-uint.md). + +**Example** + +Query: + +```sql +SELECT revision(); +``` + +Result: + +```response +┌─revision()─┐ +│ 54485 │ +└────────────┘ +``` + ## filesystemAvailable Returns the amount of free space in the filesystem hosting the database persistence. The returned value is always smaller than total free space ([filesystemFree](#filesystemfree)) because some space is reserved for the operating system. @@ -1823,9 +1937,7 @@ filesystemAvailable() **Returned value** -- The amount of remaining space available in bytes. - -Type: [UInt64](../../sql-reference/data-types/int-uint.md). +- The amount of remaining space available in bytes. [UInt64](../data-types/int-uint.md). **Example** @@ -1855,9 +1967,7 @@ filesystemFree() **Returned value** -- The amount of free space in bytes. - -Type: [UInt64](../../sql-reference/data-types/int-uint.md). +- The amount of free space in bytes. [UInt64](../data-types/int-uint.md). **Example** @@ -1887,9 +1997,7 @@ filesystemCapacity() **Returned value** -- Capacity of the filesystem in bytes. - -Type: [UInt64](../../sql-reference/data-types/int-uint.md). +- Capacity of the filesystem in bytes. [UInt64](../data-types/int-uint.md). **Example** @@ -1909,7 +2017,7 @@ Result: ## initializeAggregation -Calculates the result of an aggregate function based on a single value. This function can be used to initialize aggregate functions with combinator [-State](../../sql-reference/aggregate-functions/combinators.md#agg-functions-combinator-state). You can create states of aggregate functions and insert them to columns of type [AggregateFunction](../../sql-reference/data-types/aggregatefunction.md#data-type-aggregatefunction) or use initialized aggregates as default values. +Calculates the result of an aggregate function based on a single value. This function can be used to initialize aggregate functions with combinator [-State](../../sql-reference/aggregate-functions/combinators.md#agg-functions-combinator-state). You can create states of aggregate functions and insert them to columns of type [AggregateFunction](../data-types/aggregatefunction.md#data-type-aggregatefunction) or use initialized aggregates as default values. **Syntax** @@ -1919,7 +2027,7 @@ initializeAggregation (aggregate_function, arg1, arg2, ..., argN) **Arguments** -- `aggregate_function` — Name of the aggregation function to initialize. [String](../../sql-reference/data-types/string.md). +- `aggregate_function` — Name of the aggregation function to initialize. [String](../data-types/string.md). - `arg` — Arguments of aggregate function. **Returned value(s)** @@ -1994,13 +2102,15 @@ finalizeAggregation(state) **Arguments** -- `state` — State of aggregation. [AggregateFunction](../../sql-reference/data-types/aggregatefunction.md#data-type-aggregatefunction). +- `state` — State of aggregation. [AggregateFunction](../data-types/aggregatefunction.md#data-type-aggregatefunction). **Returned value(s)** - Value/values that was aggregated. -Type: Value of any types that was aggregated. +:::note +The return type is equal to that of any types which were aggregated. +::: **Examples** @@ -2100,8 +2210,8 @@ runningAccumulate(agg_state[, grouping]); **Arguments** -- `agg_state` — State of the aggregate function. [AggregateFunction](../../sql-reference/data-types/aggregatefunction.md#data-type-aggregatefunction). -- `grouping` — Grouping key. Optional. The state of the function is reset if the `grouping` value is changed. It can be any of the [supported data types](../../sql-reference/data-types/index.md) for which the equality operator is defined. +- `agg_state` — State of the aggregate function. [AggregateFunction](../data-types/aggregatefunction.md#data-type-aggregatefunction). +- `grouping` — Grouping key. Optional. The state of the function is reset if the `grouping` value is changed. It can be any of the [supported data types](../data-types/index.md) for which the equality operator is defined. **Returned value** @@ -2253,7 +2363,7 @@ Result: └──────────────────────────────────────────────────┘ ``` -## catboostEvaluate(path_to_model, feature_1, feature_2, …, feature_n) +## catboostEvaluate :::note This function is not available in ClickHouse Cloud. @@ -2262,6 +2372,14 @@ This function is not available in ClickHouse Cloud. Evaluate an external catboost model. [CatBoost](https://catboost.ai) is an open-source gradient boosting library developed by Yandex for machine learning. Accepts a path to a catboost model and model arguments (features). Returns Float64. +**Syntax** + +```sql +catboostEvaluate(path_to_model, feature_1, feature_2, ..., feature_n) +``` + +**Example** + ```sql SELECT feat1, ..., feat_n, catboostEvaluate('/path/to/model.bin', feat_1, ..., feat_n) AS prediction FROM data_table @@ -2298,10 +2416,16 @@ communicate using a HTTP interface. By default, port `9012` is used. A different See [Training and applying models](https://catboost.ai/docs/features/training.html#training) for how to train catboost models from a training data set. -## throwIf(x\[, message\[, error_code\]\]) +## throwIf Throw an exception if argument `x` is true. +**Syntax** + +```sql +throwIf(x[, message[, error_code]]) +``` + **Arguments** - `x` - the condition to check. @@ -2361,7 +2485,7 @@ getSetting('custom_setting'); **Parameter** -- `custom_setting` — The setting name. [String](../../sql-reference/data-types/string.md). +- `custom_setting` — The setting name. [String](../data-types/string.md). **Returned value** @@ -2386,7 +2510,7 @@ Result: ## isDecimalOverflow -Checks whether the [Decimal](../../sql-reference/data-types/decimal.md) value is outside its precision or outside the specified precision. +Checks whether the [Decimal](../data-types/decimal.md) value is outside its precision or outside the specified precision. **Syntax** @@ -2396,8 +2520,8 @@ isDecimalOverflow(d, [p]) **Arguments** -- `d` — value. [Decimal](../../sql-reference/data-types/decimal.md). -- `p` — precision. Optional. If omitted, the initial precision of the first argument is used. This parameter can be helpful to migrate data from/to another database or file. [UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges). +- `d` — value. [Decimal](../data-types/decimal.md). +- `p` — precision. Optional. If omitted, the initial precision of the first argument is used. This parameter can be helpful to migrate data from/to another database or file. [UInt8](../data-types/int-uint.md#uint-ranges). **Returned values** @@ -2433,13 +2557,11 @@ countDigits(x) **Arguments** -- `x` — [Int](../../sql-reference/data-types/int-uint.md) or [Decimal](../../sql-reference/data-types/decimal.md) value. +- `x` — [Int](../data-types/int-uint.md) or [Decimal](../data-types/decimal.md) value. **Returned value** -Number of digits. - -Type: [UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges). +- Number of digits. [UInt8](../data-types/int-uint.md#uint-ranges). :::note For `Decimal` values takes into account their scales: calculates result over underlying integer type which is `(value * scale)`. For example: `countDigits(42) = 2`, `countDigits(42.000) = 5`, `countDigits(0.04200) = 4`. I.e. you may check decimal overflow for `Decimal64` with `countDecimal(x) > 18`. It's a slow variant of [isDecimalOverflow](#is-decimal-overflow). @@ -2463,9 +2585,7 @@ Result: ## errorCodeToName -Returns the textual name of an error code. - -Type: [LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md). +- The textual name of an error code. [LowCardinality(String)](../data-types/lowcardinality.md). **Syntax** @@ -2496,9 +2616,7 @@ tcpPort() **Returned value** -- The TCP port number. - -Type: [UInt16](../../sql-reference/data-types/int-uint.md). +- The TCP port number. [UInt16](../data-types/int-uint.md). **Example** @@ -2534,9 +2652,7 @@ currentProfiles() **Returned value** -- List of the current user settings profiles. - -Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). +- List of the current user settings profiles. [Array](../data-types/array.md)([String](../data-types/string.md)). ## enabledProfiles @@ -2550,9 +2666,7 @@ enabledProfiles() **Returned value** -- List of the enabled settings profiles. - -Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). +- List of the enabled settings profiles. [Array](../data-types/array.md)([String](../data-types/string.md)). ## defaultProfiles @@ -2566,9 +2680,7 @@ defaultProfiles() **Returned value** -- List of the default settings profiles. - -Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). +- List of the default settings profiles. [Array](../data-types/array.md)([String](../data-types/string.md)). ## currentRoles @@ -2582,9 +2694,7 @@ currentRoles() **Returned value** -- A list of the current roles for the current user. - -Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). +- A list of the current roles for the current user. [Array](../data-types/array.md)([String](../data-types/string.md)). ## enabledRoles @@ -2598,9 +2708,7 @@ enabledRoles() **Returned value** -- List of the enabled roles for the current user. - -Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). +- List of the enabled roles for the current user. [Array](../data-types/array.md)([String](../data-types/string.md)). ## defaultRoles @@ -2614,9 +2722,7 @@ defaultRoles() **Returned value** -- List of the default roles for the current user. - -Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). +- List of the default roles for the current user. [Array](../data-types/array.md)([String](../data-types/string.md)). ## getServerPort @@ -2630,7 +2736,7 @@ getServerPort(port_name) **Arguments** -- `port_name` — The name of the server port. [String](../../sql-reference/data-types/string.md#string). Possible values: +- `port_name` — The name of the server port. [String](../data-types/string.md#string). Possible values: - 'tcp_port' - 'tcp_port_secure' @@ -2645,9 +2751,7 @@ getServerPort(port_name) **Returned value** -- The number of the server port. - -Type: [UInt16](../../sql-reference/data-types/int-uint.md). +- The number of the server port. [UInt16](../data-types/int-uint.md). **Example** @@ -2679,9 +2783,7 @@ queryID() **Returned value** -- The ID of the current query. - -Type: [String](../../sql-reference/data-types/string.md) +- The ID of the current query. [String](../data-types/string.md) **Example** @@ -2715,9 +2817,7 @@ initialQueryID() **Returned value** -- The ID of the initial current query. - -Type: [String](../../sql-reference/data-types/string.md) +- The ID of the initial current query. [String](../data-types/string.md) **Example** @@ -2750,9 +2850,7 @@ shardNum() **Returned value** -- Shard index or constant `0`. - -Type: [UInt32](../../sql-reference/data-types/int-uint.md). +- Shard index or constant `0`. [UInt32](../data-types/int-uint.md). **Example** @@ -2792,9 +2890,7 @@ shardCount() **Returned value** -- Total number of shards or `0`. - -Type: [UInt32](../../sql-reference/data-types/int-uint.md). +- Total number of shards or `0`. [UInt32](../data-types/int-uint.md). **See Also** @@ -2816,9 +2912,7 @@ getOSKernelVersion() **Returned value** -- The current OS kernel version. - -Type: [String](../../sql-reference/data-types/string.md). +- The current OS kernel version. [String](../data-types/string.md). **Example** @@ -2852,9 +2946,7 @@ zookeeperSessionUptime() **Returned value** -- Uptime of the current ZooKeeper session in seconds. - -Type: [UInt32](../../sql-reference/data-types/int-uint.md). +- Uptime of the current ZooKeeper session in seconds. [UInt32](../data-types/int-uint.md). **Example** @@ -2891,9 +2983,7 @@ All arguments must be constant. **Returned value** -- Randomly generated table structure. - -Type: [String](../../sql-reference/data-types/string.md). +- Randomly generated table structure. [String](../data-types/string.md). **Examples** @@ -2960,9 +3050,7 @@ structureToCapnProtoSchema(structure) **Returned value** -- CapnProto schema - -Type: [String](../../sql-reference/data-types/string.md). +- CapnProto schema. [String](../data-types/string.md). **Examples** @@ -3061,9 +3149,7 @@ structureToProtobufSchema(structure) **Returned value** -- Protobuf schema - -Type: [String](../../sql-reference/data-types/string.md). +- Protobuf schema. [String](../data-types/string.md). **Examples** @@ -3143,11 +3229,11 @@ formatQueryOrNull(query) **Arguments** -- `query` - The SQL query to be formatted. [String](../../sql-reference/data-types/string.md) +- `query` - The SQL query to be formatted. [String](../data-types/string.md) **Returned value** -- The formatted query. [String](../../sql-reference/data-types/string.md). +- The formatted query. [String](../data-types/string.md). **Example** @@ -3182,11 +3268,11 @@ formatQuerySingleLineOrNull(query) **Arguments** -- `query` - The SQL query to be formatted. [String](../../sql-reference/data-types/string.md) +- `query` - The SQL query to be formatted. [String](../data-types/string.md) **Returned value** -- The formatted query. [String](../../sql-reference/data-types/string.md). +- The formatted query. [String](../data-types/string.md). **Example** @@ -3214,8 +3300,8 @@ variantElement(variant, type_name, [, default_value]) **Arguments** -- `variant` — Variant column. [Variant](../../sql-reference/data-types/variant.md). -- `type_name` — The name of the variant type to extract. [String](../../sql-reference/data-types/string.md). +- `variant` — Variant column. [Variant](../data-types/variant.md). +- `type_name` — The name of the variant type to extract. [String](../data-types/string.md). - `default_value` - The default value that will be used if variant doesn't have variant with specified type. Can be any type. Optional. **Returned value** @@ -3251,7 +3337,7 @@ variantType(variant) **Arguments** -- `variant` — Variant column. [Variant](../../sql-reference/data-types/variant.md). +- `variant` — Variant column. [Variant](../data-types/variant.md). **Returned value** @@ -3467,7 +3553,7 @@ showCertificate() **Returned value** -- Map of key-value pairs relating to the configured SSL certificate. [Map](../../sql-reference/data-types/map.md)([String](../../sql-reference/data-types/string.md), [String](../../sql-reference/data-types/string.md)). +- Map of key-value pairs relating to the configured SSL certificate. [Map](../data-types/map.md)([String](../data-types/string.md), [String](../data-types/string.md)). **Example** diff --git a/docs/en/sql-reference/functions/random-functions.md b/docs/en/sql-reference/functions/random-functions.md index 2d7752ed022..a9b483aa0e5 100644 --- a/docs/en/sql-reference/functions/random-functions.md +++ b/docs/en/sql-reference/functions/random-functions.md @@ -169,7 +169,7 @@ randUniform(min, max) ### Returned value -A random number of type [Float64](/docs/en/sql-reference/data-types/float.md). +A random number of type [Float64](../data-types/float.md). ### Example @@ -204,9 +204,7 @@ randNormal(mean, variance) **Returned value** -- Random number. - -Type: [Float64](/docs/en/sql-reference/data-types/float.md). +- Random number. [Float64](../data-types/float.md). **Example** @@ -243,9 +241,7 @@ randLogNormal(mean, variance) **Returned value** -- Random number. - -Type: [Float64](/docs/en/sql-reference/data-types/float.md). +- Random number. [Float64](../data-types/float.md). **Example** @@ -282,9 +278,7 @@ randBinomial(experiments, probability) **Returned value** -- Random number. - -Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). +- Random number. [UInt64](../data-types/int-uint.md). **Example** @@ -321,9 +315,7 @@ randNegativeBinomial(experiments, probability) **Returned value** -- Random number. - -Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). +- Random number. [UInt64](../data-types/int-uint.md). **Example** @@ -359,9 +351,7 @@ randPoisson(n) **Returned value** -- Random number. - -Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). +- Random number. [UInt64](../data-types/int-uint.md). **Example** @@ -397,9 +387,7 @@ randBernoulli(probability) **Returned value** -- Random number. - -Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). +- Random number. [UInt64](../data-types/int-uint.md). **Example** @@ -435,9 +423,7 @@ randExponential(lambda) **Returned value** -- Random number. - -Type: [Float64](/docs/en/sql-reference/data-types/float.md). +- Random number. [Float64](../data-types/float.md). **Example** @@ -473,9 +459,7 @@ randChiSquared(degree_of_freedom) **Returned value** -- Random number. - -Type: [Float64](/docs/en/sql-reference/data-types/float.md). +- Random number. [Float64](../data-types/float.md). **Example** @@ -511,9 +495,7 @@ randStudentT(degree_of_freedom) **Returned value** -- Random number. - -Type: [Float64](/docs/en/sql-reference/data-types/float.md). +- Random number. [Float64](../data-types/float.md). **Example** @@ -550,9 +532,7 @@ randFisherF(d1, d2) **Returned value** -- Random number. - -Type: [Float64](/docs/en/sql-reference/data-types/float.md). +- Random number. [Float64](../data-types/float.md). **Example** @@ -588,9 +568,7 @@ randomString(length) **Returned value** -- String filled with random bytes. - -Type: [String](../../sql-reference/data-types/string.md). +- String filled with random bytes. [String](../data-types/string.md). **Example** @@ -626,13 +604,11 @@ randomFixedString(length); **Arguments** -- `length` — String length in bytes. [UInt64](../../sql-reference/data-types/int-uint.md). +- `length` — String length in bytes. [UInt64](../data-types/int-uint.md). **Returned value(s)** -- String filled with random bytes. - -Type: [FixedString](../../sql-reference/data-types/fixedstring.md). +- String filled with random bytes. [FixedString](../data-types/fixedstring.md). **Example** @@ -667,9 +643,7 @@ randomPrintableASCII(length) **Returned value** -- String with a random set of [ASCII](https://en.wikipedia.org/wiki/ASCII#Printable_characters) printable characters. - -Type: [String](../../sql-reference/data-types/string.md) +- String with a random set of [ASCII](https://en.wikipedia.org/wiki/ASCII#Printable_characters) printable characters. [String](../data-types/string.md) **Example** @@ -697,13 +671,11 @@ randomStringUTF8(length); **Arguments** -- `length` — Length of the string in code points. [UInt64](../../sql-reference/data-types/int-uint.md). +- `length` — Length of the string in code points. [UInt64](../data-types/int-uint.md). **Returned value(s)** -- UTF-8 random string. - -Type: [String](../../sql-reference/data-types/string.md). +- UTF-8 random string. [String](../data-types/string.md). **Example** diff --git a/docs/en/sql-reference/functions/rounding-functions.md b/docs/en/sql-reference/functions/rounding-functions.md index afec43cd6f4..d18185c5013 100644 --- a/docs/en/sql-reference/functions/rounding-functions.md +++ b/docs/en/sql-reference/functions/rounding-functions.md @@ -36,8 +36,8 @@ Alias: `truncate`. **Parameters** -- `input`: A numeric type ([Float](/docs/en/sql-reference/data-types/float.md), [Decimal](/docs/en/sql-reference/data-types/decimal.md) or [Integer](/docs/en/sql-reference/data-types/int-uint.md)). -- `precision`: An [Integer](/docs/en/sql-reference/data-types/int-uint.md) type. +- `input`: A numeric type ([Float](../data-types/float.md), [Decimal](../data-types/decimal.md) or [Integer](../data-types/int-uint.md)). +- `precision`: An [Integer](../data-types/int-uint.md) type. **Returned value** @@ -69,7 +69,7 @@ round(expression [, decimal_places]) **Arguments** -- `expression` — A number to be rounded. Can be any [expression](../../sql-reference/syntax.md#syntax-expressions) returning the numeric [data type](../../sql-reference/data-types/index.md#data_types). +- `expression` — A number to be rounded. Can be any [expression](../../sql-reference/syntax.md#syntax-expressions) returning the numeric [data type](../data-types/index.md#data_types). - `decimal-places` — An integer value. - If `decimal-places > 0` then the function rounds the value to the right of the decimal point. - If `decimal-places < 0` then the function rounds the value to the left of the decimal point. @@ -171,7 +171,7 @@ roundBankers(expression [, decimal_places]) **Arguments** -- `expression` — A number to be rounded. Can be any [expression](../../sql-reference/syntax.md#syntax-expressions) returning the numeric [data type](../../sql-reference/data-types/index.md#data_types). +- `expression` — A number to be rounded. Can be any [expression](../../sql-reference/syntax.md#syntax-expressions) returning the numeric [data type](../data-types/index.md#data_types). - `decimal-places` — Decimal places. An integer number. - `decimal-places > 0` — The function rounds the number to the given position right of the decimal point. Example: `roundBankers(3.55, 1) = 3.6`. - `decimal-places < 0` — The function rounds the number to the given position left of the decimal point. Example: `roundBankers(24.55, -1) = 20`. diff --git a/docs/en/sql-reference/functions/splitting-merging-functions.md b/docs/en/sql-reference/functions/splitting-merging-functions.md index 8e50637cf30..20d63d84628 100644 --- a/docs/en/sql-reference/functions/splitting-merging-functions.md +++ b/docs/en/sql-reference/functions/splitting-merging-functions.md @@ -19,20 +19,20 @@ splitByChar(separator, s[, max_substrings])) **Arguments** -- `separator` — The separator which should contain exactly one character. [String](../../sql-reference/data-types/string.md). -- `s` — The string to split. [String](../../sql-reference/data-types/string.md). +- `separator` — The separator which should contain exactly one character. [String](../data-types/string.md). +- `s` — The string to split. [String](../data-types/string.md). - `max_substrings` — An optional `Int64` defaulting to 0. If `max_substrings` > 0, the returned array will contain at most `max_substrings` substrings, otherwise the function will return as many substrings as possible. **Returned value(s)** -Returns an array of selected substrings. Empty substrings may be selected when: +- An array of selected substrings. [Array](../data-types/array.md)([String](../data-types/string.md)). + + Empty substrings may be selected when: - A separator occurs at the beginning or end of the string; - There are multiple consecutive separators; - The original string `s` is empty. -Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). - :::note The behavior of parameter `max_substrings` changed starting with ClickHouse v22.11. In versions older than that, `max_substrings > 0` meant that `max_substring`-many splits were performed and that the remainder of the string was returned as the final element of the list. For example, @@ -70,21 +70,23 @@ splitByString(separator, s[, max_substrings])) **Arguments** -- `separator` — The separator. [String](../../sql-reference/data-types/string.md). -- `s` — The string to split. [String](../../sql-reference/data-types/string.md). +- `separator` — The separator. [String](../data-types/string.md). +- `s` — The string to split. [String](../data-types/string.md). - `max_substrings` — An optional `Int64` defaulting to 0. When `max_substrings` > 0, the returned substrings will be no more than `max_substrings`, otherwise the function will return as many substrings as possible. **Returned value(s)** -Returns an array of selected substrings. Empty substrings may be selected when: +- An array of selected substrings. [Array](../data-types/array.md)([String](../data-types/string.md)). -Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). +Empty substrings may be selected when: - A non-empty separator occurs at the beginning or end of the string; - There are multiple consecutive non-empty separators; - The original string `s` is empty while the separator is not empty. +:::note Setting [splitby_max_substrings_includes_remaining_string](../../operations/settings/settings.md#splitby_max_substrings_includes_remaining_string) (default: 0) controls if the remaining string is included in the last element of the result array when argument `max_substrings` > 0. +::: **Example** @@ -125,21 +127,24 @@ splitByRegexp(regexp, s[, max_substrings])) **Arguments** - `regexp` — Regular expression. Constant. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). -- `s` — The string to split. [String](../../sql-reference/data-types/string.md). +- `s` — The string to split. [String](../data-types/string.md). - `max_substrings` — An optional `Int64` defaulting to 0. When `max_substrings` > 0, the returned substrings will be no more than `max_substrings`, otherwise the function will return as many substrings as possible. **Returned value(s)** -Returns an array of selected substrings. Empty substrings may be selected when: +- An array of selected substrings. [Array](../data-types/array.md)([String](../data-types/string.md)). + + +Empty substrings may be selected when: - A non-empty regular expression match occurs at the beginning or end of the string; - There are multiple consecutive non-empty regular expression matches; - The original string `s` is empty while the regular expression is not empty. -Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). - +:::note Setting [splitby_max_substrings_includes_remaining_string](../../operations/settings/settings.md#splitby_max_substrings_includes_remaining_string) (default: 0) controls if the remaining string is included in the last element of the result array when argument `max_substrings` > 0. +::: **Example** @@ -180,17 +185,17 @@ splitByWhitespace(s[, max_substrings])) **Arguments** -- `s` — The string to split. [String](../../sql-reference/data-types/string.md). +- `s` — The string to split. [String](../data-types/string.md). - `max_substrings` — An optional `Int64` defaulting to 0. When `max_substrings` > 0, the returned substrings will be no more than `max_substrings`, otherwise the function will return as many substrings as possible. **Returned value(s)** -Returns an array of selected substrings. - -Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). - +- An array of selected substrings. [Array](../data-types/array.md)([String](../data-types/string.md)). + +:::note Setting [splitby_max_substrings_includes_remaining_string](../../operations/settings/settings.md#splitby_max_substrings_includes_remaining_string) (default: 0) controls if the remaining string is included in the last element of the result array when argument `max_substrings` > 0. +::: **Example** @@ -219,17 +224,17 @@ splitByNonAlpha(s[, max_substrings])) **Arguments** -- `s` — The string to split. [String](../../sql-reference/data-types/string.md). +- `s` — The string to split. [String](../data-types/string.md). - `max_substrings` — An optional `Int64` defaulting to 0. When `max_substrings` > 0, the returned substrings will be no more than `max_substrings`, otherwise the function will return as many substrings as possible. **Returned value(s)** -Returns an array of selected substrings. - -Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). +- An array of selected substrings. [Array](../data-types/array.md)([String](../data-types/string.md)). +:::note Setting [splitby_max_substrings_includes_remaining_string](../../operations/settings/settings.md#splitby_max_substrings_includes_remaining_string) (default: 0) controls if the remaining string is included in the last element of the result array when argument `max_substrings` > 0. +::: **Example** @@ -282,16 +287,16 @@ Alias: `splitByAlpha` **Arguments** -- `s` — The string to split. [String](../../sql-reference/data-types/string.md). +- `s` — The string to split. [String](../data-types/string.md). - `max_substrings` — An optional `Int64` defaulting to 0. When `max_substrings` > 0, the returned substrings will be no more than `max_substrings`, otherwise the function will return as many substrings as possible. **Returned value(s)** -Returns an array of selected substrings. - -Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). +- An array of selected substrings. [Array](../data-types/array.md)([String](../data-types/string.md)). +:::note Setting [splitby_max_substrings_includes_remaining_string](../../operations/settings/settings.md#splitby_max_substrings_includes_remaining_string) (default: 0) controls if the remaining string is included in the last element of the result array when argument `max_substrings` > 0. +::: **Example** @@ -322,11 +327,7 @@ extractAllGroups(text, regexp) **Returned values** -- If the function finds at least one matching group, it returns `Array(Array(String))` column, clustered by group_id (1 to N, where N is number of capturing groups in `regexp`). - -- If there is no matching group, returns an empty array. - -Type: [Array](../data-types/array.md). +- If the function finds at least one matching group, it returns `Array(Array(String))` column, clustered by group_id (1 to N, where N is number of capturing groups in `regexp`). If there is no matching group, it returns an empty array. [Array](../data-types/array.md). **Example** @@ -354,14 +355,12 @@ ngrams(string, ngramsize) **Arguments** -- `string` — String. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). -- `ngramsize` — The size of an n-gram. [UInt](../../sql-reference/data-types/int-uint.md). +- `string` — String. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). +- `ngramsize` — The size of an n-gram. [UInt](../data-types/int-uint.md). **Returned values** -- Array with n-grams. - -Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). +- Array with n-grams. [Array](../data-types/array.md)([String](../data-types/string.md)). **Example** @@ -383,13 +382,11 @@ Splits a string into tokens using non-alphanumeric ASCII characters as separator **Arguments** -- `input_string` — Any set of bytes represented as the [String](../../sql-reference/data-types/string.md) data type object. +- `input_string` — Any set of bytes represented as the [String](../data-types/string.md) data type object. **Returned value** -- The resulting array of tokens from input string. - -Type: [Array](../data-types/array.md). +- The resulting array of tokens from input string. [Array](../data-types/array.md). **Example** diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md index ba23870a584..342ca2b9f03 100644 --- a/docs/en/sql-reference/functions/string-functions.md +++ b/docs/en/sql-reference/functions/string-functions.md @@ -30,9 +30,7 @@ empty(x) **Returned value** -- Returns `1` for an empty string or `0` for a non-empty string. - -Type: [UInt8](../data-types/int-uint.md). +- Returns `1` for an empty string or `0` for a non-empty string. [UInt8](../data-types/int-uint.md). **Example** @@ -68,9 +66,7 @@ notEmpty(x) **Returned value** -- Returns `1` for a non-empty string or `0` for an empty string string. - -Type: [UInt8](../data-types/int-uint.md). +- Returns `1` for a non-empty string or `0` for an empty string string. [UInt8](../data-types/int-uint.md). **Example** @@ -187,7 +183,7 @@ left(s, offset) **Parameters** -- `s`: The string to calculate a substring from. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). +- `s`: The string to calculate a substring from. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). - `offset`: The number of bytes of the offset. [UInt*](../data-types/int-uint). **Returned value** @@ -234,7 +230,7 @@ leftUTF8(s, offset) **Parameters** -- `s`: The UTF-8 encoded string to calculate a substring from. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). +- `s`: The UTF-8 encoded string to calculate a substring from. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). - `offset`: The number of bytes of the offset. [UInt*](../data-types/int-uint). **Returned value** @@ -289,9 +285,7 @@ Alias: `LPAD` **Returned value** -- A left-padded string of the given length. - -Type: [String](../data-types/string.md). +- A left-padded string of the given length. [String](../data-types/string.md). **Example** @@ -325,9 +319,7 @@ leftPadUTF8(string, length[, pad_string]) **Returned value** -- A left-padded string of the given length. - -Type: [String](../data-types/string.md). +- A left-padded string of the given length. [String](../data-types/string.md). **Example** @@ -355,7 +347,7 @@ right(s, offset) **Parameters** -- `s`: The string to calculate a substring from. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). +- `s`: The string to calculate a substring from. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). - `offset`: The number of bytes of the offset. [UInt*](../data-types/int-uint). **Returned value** @@ -402,7 +394,7 @@ rightUTF8(s, offset) **Parameters** -- `s`: The UTF-8 encoded string to calculate a substring from. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). +- `s`: The UTF-8 encoded string to calculate a substring from. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). - `offset`: The number of bytes of the offset. [UInt*](../data-types/int-uint). **Returned value** @@ -457,9 +449,7 @@ Alias: `RPAD` **Returned value** -- A left-padded string of the given length. - -Type: [String](../data-types/string.md). +- A left-padded string of the given length. [String](../data-types/string.md). **Example** @@ -493,9 +483,7 @@ rightPadUTF8(string, length[, pad_string]) **Returned value** -- A right-padded string of the given length. - -Type: [String](../data-types/string.md). +- A right-padded string of the given length. [String](../data-types/string.md). **Example** @@ -525,11 +513,11 @@ Alias: `lcase` **Parameters** -- `input`: A string type [String](/docs/en/sql-reference/data-types/string.md). +- `input`: A string type [String](../data-types/string.md). **Returned value** -- A [String](/docs/en/sql-reference/data-types/string.md) data type value. +- A [String](../data-types/string.md) data type value. **Example** @@ -559,11 +547,11 @@ Alias: `ucase` **Parameters** -- `input`: A string type [String](/docs/en/sql-reference/data-types/string.md). +- `input`: A string type [String](../data-types/string.md). **Returned value** -- A [String](/docs/en/sql-reference/data-types/string.md) data type value. +- A [String](../data-types/string.md) data type value. **Examples** @@ -603,11 +591,11 @@ upperUTF8(input) **Parameters** -- `input`: A string type [String](/docs/en/sql-reference/data-types/string.md). +- `input`: A string type [String](../data-types/string.md). **Returned value** -- A [String](/docs/en/sql-reference/data-types/string.md) data type value. +- A [String](../data-types/string.md) data type value. **Example** @@ -639,7 +627,7 @@ toValidUTF8(input_string) **Arguments** -- `input_string` — Any set of bytes represented as the [String](../../sql-reference/data-types/string.md) data type object. +- `input_string` — Any set of bytes represented as the [String](../data-types/string.md) data type object. **Returned value** @@ -671,14 +659,12 @@ Alias: `REPEAT` **Arguments** -- `s` — The string to repeat. [String](../../sql-reference/data-types/string.md). -- `n` — The number of times to repeat the string. [UInt* or Int*](../../sql-reference/data-types/int-uint.md). +- `s` — The string to repeat. [String](../data-types/string.md). +- `n` — The number of times to repeat the string. [UInt* or Int*](../data-types/int-uint.md). **Returned value** -A string containing string `s` repeated `n` times. If `n` <= 0, the function returns the empty string. - -Type: `String`. +A string containing string `s` repeated `n` times. If `n` <= 0, the function returns the empty string. [String](../data-types/string.md). **Example** @@ -708,13 +694,11 @@ Alias: `SPACE`. **Arguments** -- `n` — The number of times to repeat the space. [UInt* or Int*](../../sql-reference/data-types/int-uint.md). +- `n` — The number of times to repeat the space. [UInt* or Int*](../data-types/int-uint.md). **Returned value** -The string containing string ` ` repeated `n` times. If `n` <= 0, the function returns the empty string. - -Type: `String`. +The string containing string ` ` repeated `n` times. If `n` <= 0, the function returns the empty string. [String](../data-types/string.md). **Example** @@ -754,7 +738,7 @@ concat(s1, s2, ...) At least one value of arbitrary type. -Arguments which are not of types [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md) are converted to strings using their default serialization. As this decreases performance, it is not recommended to use non-String/FixedString arguments. +Arguments which are not of types [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md) are converted to strings using their default serialization. As this decreases performance, it is not recommended to use non-String/FixedString arguments. **Returned values** @@ -861,8 +845,8 @@ Alias: `concat_ws` **Arguments** -- sep — separator. Const [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). -- exprN — expression to be concatenated. Arguments which are not of types [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md) are converted to strings using their default serialization. As this decreases performance, it is not recommended to use non-String/FixedString arguments. +- sep — separator. Const [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). +- exprN — expression to be concatenated. Arguments which are not of types [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md) are converted to strings using their default serialization. As this decreases performance, it is not recommended to use non-String/FixedString arguments. **Returned values** @@ -907,15 +891,13 @@ Alias: **Arguments** -- `s` — The string to calculate a substring from. [String](../../sql-reference/data-types/string.md), [FixedString](../../sql-reference/data-types/fixedstring.md) or [Enum](../../sql-reference/data-types/enum.md) -- `offset` — The starting position of the substring in `s` . [(U)Int*](../../sql-reference/data-types/int-uint.md). -- `length` — The maximum length of the substring. [(U)Int*](../../sql-reference/data-types/int-uint.md). Optional. +- `s` — The string to calculate a substring from. [String](../data-types/string.md), [FixedString](../data-types/fixedstring.md) or [Enum](../data-types/enum.md) +- `offset` — The starting position of the substring in `s` . [(U)Int*](../data-types/int-uint.md). +- `length` — The maximum length of the substring. [(U)Int*](../data-types/int-uint.md). Optional. **Returned value** -A substring of `s` with `length` many bytes, starting at index `offset`. - -Type: `String`. +A substring of `s` with `length` many bytes, starting at index `offset`. [String](../data-types/string.md). **Example** @@ -945,9 +927,9 @@ substringUTF8(s, offset[, length]) **Arguments** -- `s`: The string to calculate a substring from. [String](../../sql-reference/data-types/string.md), [FixedString](../../sql-reference/data-types/fixedstring.md) or [Enum](../../sql-reference/data-types/enum.md) -- `offset`: The starting position of the substring in `s` . [(U)Int*](../../sql-reference/data-types/int-uint.md). -- `length`: The maximum length of the substring. [(U)Int*](../../sql-reference/data-types/int-uint.md). Optional. +- `s`: The string to calculate a substring from. [String](../data-types/string.md), [FixedString](../data-types/fixedstring.md) or [Enum](../data-types/enum.md) +- `offset`: The starting position of the substring in `s` . [(U)Int*](../data-types/int-uint.md). +- `length`: The maximum length of the substring. [(U)Int*](../data-types/int-uint.md). Optional. **Returned value** @@ -983,8 +965,8 @@ Alias: `SUBSTRING_INDEX` **Arguments** -- s: The string to extract substring from. [String](../../sql-reference/data-types/string.md). -- delim: The character to split. [String](../../sql-reference/data-types/string.md). +- s: The string to extract substring from. [String](../data-types/string.md). +- delim: The character to split. [String](../data-types/string.md). - count: The number of occurrences of the delimiter to count before extracting the substring. If count is positive, everything to the left of the final delimiter (counting from the left) is returned. If count is negative, everything to the right of the final delimiter (counting from the right) is returned. [UInt or Int](../data-types/int-uint.md) **Example** @@ -1014,13 +996,13 @@ substringIndexUTF8(s, delim, count) **Arguments** -- `s`: The string to extract substring from. [String](../../sql-reference/data-types/string.md). -- `delim`: The character to split. [String](../../sql-reference/data-types/string.md). +- `s`: The string to extract substring from. [String](../data-types/string.md). +- `delim`: The character to split. [String](../data-types/string.md). - `count`: The number of occurrences of the delimiter to count before extracting the substring. If count is positive, everything to the left of the final delimiter (counting from the left) is returned. If count is negative, everything to the right of the final delimiter (counting from the right) is returned. [UInt or Int](../data-types/int-uint.md) **Returned value** -A substring [String](../../sql-reference/data-types/string.md) of `s` before `count` occurrences of `delim`. +A substring [String](../data-types/string.md) of `s` before `count` occurrences of `delim`. **Implementation details** @@ -1068,13 +1050,11 @@ base58Encode(plaintext) **Arguments** -- `plaintext` — [String](../../sql-reference/data-types/string.md) column or constant. +- `plaintext` — [String](../data-types/string.md) column or constant. **Returned value** -- A string containing the encoded value of the argument. - -Type: [String](../../sql-reference/data-types/string.md). +- A string containing the encoded value of the argument. [String](../data-types/string.md). **Example** @@ -1102,13 +1082,11 @@ base58Decode(encoded) **Arguments** -- `encoded` — [String](../../sql-reference/data-types/string.md) column or constant. If the string is not a valid Base58-encoded value, an exception is thrown. +- `encoded` — [String](../data-types/string.md) column or constant. If the string is not a valid Base58-encoded value, an exception is thrown. **Returned value** -- A string containing the decoded value of the argument. - -Type: [String](../../sql-reference/data-types/string.md). +- A string containing the decoded value of the argument. [String](../data-types/string.md). **Example** @@ -1136,7 +1114,7 @@ tryBase58Decode(encoded) **Parameters** -- `encoded`: [String](../../sql-reference/data-types/string.md) column or constant. If the string is not a valid Base58-encoded value, returns an empty string in case of error. +- `encoded`: [String](../data-types/string.md) column or constant. If the string is not a valid Base58-encoded value, returns an empty string in case of error. **Returned value** @@ -1180,7 +1158,7 @@ tryBase64Decode(encoded) **Parameters** -- `encoded`: [String](../../sql-reference/data-types/string.md) column or constant. If the string is not a valid Base58-encoded value, returns an empty string in case of error. +- `encoded`: [String](../data-types/string.md) column or constant. If the string is not a valid Base58-encoded value, returns an empty string in case of error. **Examples** @@ -1279,14 +1257,12 @@ trim([[LEADING|TRAILING|BOTH] trim_character FROM] input_string) **Arguments** -- `trim_character` — Specified characters for trim. [String](../../sql-reference/data-types/string.md). -- `input_string` — String for trim. [String](../../sql-reference/data-types/string.md). +- `trim_character` — Specified characters for trim. [String](../data-types/string.md). +- `input_string` — String for trim. [String](../data-types/string.md). **Returned value** -A string without leading and/or trailing specified characters. - -Type: `String`. +A string without leading and/or trailing specified characters. [String](../data-types/string.md). **Example** @@ -1316,13 +1292,11 @@ Alias: `ltrim(input_string)`. **Arguments** -- `input_string` — string to trim. [String](../../sql-reference/data-types/string.md). +- `input_string` — string to trim. [String](../data-types/string.md). **Returned value** -A string without leading common whitespaces. - -Type: `String`. +A string without leading common whitespaces. [String](../data-types/string.md). **Example** @@ -1352,13 +1326,11 @@ Alias: `rtrim(input_string)`. **Arguments** -- `input_string` — string to trim. [String](../../sql-reference/data-types/string.md). +- `input_string` — string to trim. [String](../data-types/string.md). **Returned value** -A string without trailing common whitespaces. - -Type: `String`. +A string without trailing common whitespaces. [String](../data-types/string.md). **Example** @@ -1388,13 +1360,11 @@ Alias: `trim(input_string)`. **Arguments** -- `input_string` — string to trim. [String](../../sql-reference/data-types/string.md). +- `input_string` — string to trim. [String](../data-types/string.md). **Returned value** -A string without leading and trailing common whitespaces. - -Type: `String`. +A string without leading and trailing common whitespaces. [String](../data-types/string.md). **Example** @@ -1440,13 +1410,11 @@ normalizeQuery(x) **Arguments** -- `x` — Sequence of characters. [String](../../sql-reference/data-types/string.md). +- `x` — Sequence of characters. [String](../data-types/string.md). **Returned value** -- Sequence of characters with placeholders. - -Type: [String](../../sql-reference/data-types/string.md). +- Sequence of characters with placeholders. [String](../data-types/string.md). **Example** @@ -1474,13 +1442,11 @@ normalizedQueryHash(x) **Arguments** -- `x` — Sequence of characters. [String](../../sql-reference/data-types/string.md). +- `x` — Sequence of characters. [String](../data-types/string.md). **Returned value** -- Hash value. - -Type: [UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges). +- Hash value. [UInt64](../data-types/int-uint.md#uint-ranges). **Example** @@ -1508,13 +1474,11 @@ normalizeUTF8NFC(words) **Arguments** -- `words` — UTF8-encoded input string. [String](../../sql-reference/data-types/string.md). +- `words` — UTF8-encoded input string. [String](../data-types/string.md). **Returned value** -- String transformed to NFC normalization form. - -Type: [String](../../sql-reference/data-types/string.md). +- String transformed to NFC normalization form. [String](../data-types/string.md). **Example** @@ -1542,13 +1506,11 @@ normalizeUTF8NFD(words) **Arguments** -- `words` — UTF8-encoded input string. [String](../../sql-reference/data-types/string.md). +- `words` — UTF8-encoded input string. [String](../data-types/string.md). **Returned value** -- String transformed to NFD normalization form. - -Type: [String](../../sql-reference/data-types/string.md). +- String transformed to NFD normalization form. [String](../data-types/string.md). **Example** @@ -1576,13 +1538,11 @@ normalizeUTF8NFKC(words) **Arguments** -- `words` — UTF8-encoded input string. [String](../../sql-reference/data-types/string.md). +- `words` — UTF8-encoded input string. [String](../data-types/string.md). **Returned value** -- String transformed to NFKC normalization form. - -Type: [String](../../sql-reference/data-types/string.md). +- String transformed to NFKC normalization form. [String](../data-types/string.md). **Example** @@ -1610,13 +1570,11 @@ normalizeUTF8NFKD(words) **Arguments** -- `words` — UTF8-encoded input string. [String](../../sql-reference/data-types/string.md). +- `words` — UTF8-encoded input string. [String](../data-types/string.md). **Returned value** -- String transformed to NFKD normalization form. - -Type: [String](../../sql-reference/data-types/string.md). +- String transformed to NFKD normalization form. [String](../data-types/string.md). **Example** @@ -1647,13 +1605,11 @@ encodeXMLComponent(x) **Arguments** -- `x` — An input string. [String](../../sql-reference/data-types/string.md). +- `x` — An input string. [String](../data-types/string.md). **Returned value** -- The escaped string. - -Type: [String](../../sql-reference/data-types/string.md). +- The escaped string. [String](../data-types/string.md). **Example** @@ -1687,13 +1643,11 @@ decodeXMLComponent(x) **Arguments** -- `x` — An input string. [String](../../sql-reference/data-types/string.md). +- `x` — An input string. [String](../data-types/string.md). **Returned value** -- The un-escaped string. - -Type: [String](../../sql-reference/data-types/string.md). +- The un-escaped string. [String](../data-types/string.md). **Example** @@ -1723,13 +1677,11 @@ decodeHTMLComponent(x) **Arguments** -- `x` — An input string. [String](../../sql-reference/data-types/string.md). +- `x` — An input string. [String](../data-types/string.md). **Returned value** -- The un-escaped string. - -Type: [String](../../sql-reference/data-types/string.md). +- The un-escaped string. [String](../data-types/string.md). **Example** @@ -1778,13 +1730,11 @@ extractTextFromHTML(x) **Arguments** -- `x` — input text. [String](../../sql-reference/data-types/string.md). +- `x` — input text. [String](../data-types/string.md). **Returned value** -- Extracted text. - -Type: [String](../../sql-reference/data-types/string.md). +- Extracted text. [String](../data-types/string.md). **Example** diff --git a/docs/en/sql-reference/functions/string-replace-functions.md b/docs/en/sql-reference/functions/string-replace-functions.md index 0b761b62006..7aeb1f5b2a7 100644 --- a/docs/en/sql-reference/functions/string-replace-functions.md +++ b/docs/en/sql-reference/functions/string-replace-functions.md @@ -139,7 +139,7 @@ Format the `pattern` string with the values (strings, integers, etc.) listed in **Syntax** ```sql -format(pattern, s0, s1, …) +format(pattern, s0, s1, ...) ``` **Example** @@ -202,13 +202,13 @@ translateUTF8(s, from, to) **Parameters** -- `s`: A string type [String](/docs/en/sql-reference/data-types/string.md). -- `from`: A string type [String](/docs/en/sql-reference/data-types/string.md). -- `to`: A string type [String](/docs/en/sql-reference/data-types/string.md). +- `s`: A string type [String](../data-types/string.md). +- `from`: A string type [String](../data-types/string.md). +- `to`: A string type [String](../data-types/string.md). **Returned value** -- A [String](/docs/en/sql-reference/data-types/string.md) data type value. +- A [String](../data-types/string.md) data type value. **Examples** diff --git a/docs/en/sql-reference/functions/string-search-functions.md b/docs/en/sql-reference/functions/string-search-functions.md index 9738c19bf3c..d261cff3580 100644 --- a/docs/en/sql-reference/functions/string-search-functions.md +++ b/docs/en/sql-reference/functions/string-search-functions.md @@ -17,7 +17,7 @@ Functions in this section also assume that the searched string (referred to in t violated, no exception is thrown and results are undefined. Search with UTF-8 encoded strings is usually provided by separate function variants. Likewise, if a UTF-8 function variant is used and the input strings are not UTF-8 encoded text, no exception is thrown and the results are undefined. Note that no automatic Unicode normalization is performed, however you can use the -[normalizeUTF8*()](https://clickhouse.com/docs/en/sql-reference/functions/string-functions/) functions for that. +[normalizeUTF8*()](https://clickhouse.com../functions/string-functions/) functions for that. [General strings functions](string-functions.md) and [functions for replacing in strings](string-replace-functions.md) are described separately. @@ -38,12 +38,12 @@ Alias: - `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). - `needle` — Substring to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `start_pos` – Position (1-based) in `haystack` at which the search starts. [UInt](../../sql-reference/data-types/int-uint.md). Optional. +- `start_pos` – Position (1-based) in `haystack` at which the search starts. [UInt](../data-types/int-uint.md). Optional. -**Returned values** +**Returned value** -- Starting position in bytes and counting from 1, if the substring was found. -- 0, if the substring was not found. +- Starting position in bytes and counting from 1, if the substring was found. [UInt64](../data-types/int-uint.md). +- 0, if the substring was not found. [UInt64](../data-types/int-uint.md). If substring `needle` is empty, these rules apply: - if no `start_pos` was specified: return `1` @@ -53,8 +53,6 @@ If substring `needle` is empty, these rules apply: The same rules also apply to functions `locate`, `positionCaseInsensitive`, `positionUTF8` and `positionCaseInsensitiveUTF8`. -Type: `Integer`. - **Examples** Query: @@ -206,9 +204,9 @@ multiSearchAllPositions(haystack, [needle1, needle2, ..., needleN]) **Arguments** - `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `needle` — Substrings to be searched. [Array](../../sql-reference/data-types/array.md). +- `needle` — Substrings to be searched. [Array](../data-types/array.md). -**Returned values** +**Returned value** - Array of the starting position in bytes and counting from 1, if the substring was found. - 0, if the substring was not found. @@ -241,7 +239,7 @@ multiSearchAllPositionsCaseInsensitive(haystack, [needle1, needle2, ..., needleN **Parameters** - `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `needle` — Substrings to be searched. [Array](../../sql-reference/data-types/array.md). +- `needle` — Substrings to be searched. [Array](../data-types/array.md). **Returned value** @@ -275,7 +273,7 @@ multiSearchAllPositionsUTF8(haystack, [needle1, needle2, ..., needleN]) **Parameters** - `haystack` — UTF-8 encoded string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `needle` — UTF-8 encoded substrings to be searched. [Array](../../sql-reference/data-types/array.md). +- `needle` — UTF-8 encoded substrings to be searched. [Array](../data-types/array.md). **Returned value** @@ -311,7 +309,7 @@ multiSearchAllPositionsCaseInsensitiveUTF8(haystack, [needle1, needle2, ..., nee **Parameters** - `haystack` — UTF-8 encoded string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `needle` — UTF-8 encoded substrings to be searched. [Array](../../sql-reference/data-types/array.md). +- `needle` — UTF-8 encoded substrings to be searched. [Array](../data-types/array.md). **Returned value** @@ -349,7 +347,7 @@ multiSearchFirstPosition(haystack, [needle1, needle2, ..., needleN]) **Parameters** - `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `needle` — Substrings to be searched. [Array](../../sql-reference/data-types/array.md). +- `needle` — Substrings to be searched. [Array](../data-types/array.md). **Returned value** @@ -383,7 +381,7 @@ multiSearchFirstPositionCaseInsensitive(haystack, [needle1, needle2, ..., needle **Parameters** - `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `needle` — Array of substrings to be searched. [Array](../../sql-reference/data-types/array.md). +- `needle` — Array of substrings to be searched. [Array](../data-types/array.md). **Returned value** @@ -417,7 +415,7 @@ multiSearchFirstPositionUTF8(haystack, [needle1, needle2, ..., needleN]) **Parameters** - `haystack` — UTF-8 string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `needle` — Array of UTF-8 substrings to be searched. [Array](../../sql-reference/data-types/array.md). +- `needle` — Array of UTF-8 substrings to be searched. [Array](../data-types/array.md). **Returned value** @@ -453,7 +451,7 @@ multiSearchFirstPositionCaseInsensitiveUTF8(haystack, [needle1, needle2, ..., ne **Parameters** - `haystack` — UTF-8 string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `needle` — Array of UTF-8 substrings to be searched. [Array](../../sql-reference/data-types/array.md) +- `needle` — Array of UTF-8 substrings to be searched. [Array](../data-types/array.md) **Returned value** @@ -490,12 +488,11 @@ multiSearchFirstIndex(haystack, [needle1, needle2, ..., needleN]) **Parameters** - `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `needle` — Substrings to be searched. [Array](../../sql-reference/data-types/array.md). +- `needle` — Substrings to be searched. [Array](../data-types/array.md). **Returned value** -- index (starting from 1) of the leftmost found needle. -- 0, if there was no match. +- index (starting from 1) of the leftmost found needle. Otherwise 0, if there was no match. [UInt8](../data-types/int-uint.md). **Example** @@ -524,12 +521,11 @@ multiSearchFirstIndexCaseInsensitive(haystack, [needle1, needle2, ..., needleN]) **Parameters** - `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `needle` — Substrings to be searched. [Array](../../sql-reference/data-types/array.md). +- `needle` — Substrings to be searched. [Array](../data-types/array.md). **Returned value** -- index (starting from 1) of the leftmost found needle. -- 0, if there was no match. +- index (starting from 1) of the leftmost found needle. Otherwise 0, if there was no match. [UInt8](../data-types/int-uint.md). **Example** @@ -558,12 +554,11 @@ multiSearchFirstIndexUTF8(haystack, [needle1, needle2, ..., needleN]) **Parameters** - `haystack` — UTF-8 string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `needle` — Array of UTF-8 substrings to be searched. [Array](../../sql-reference/data-types/array.md) +- `needle` — Array of UTF-8 substrings to be searched. [Array](../data-types/array.md) **Returned value** -- index (starting from 1) of the leftmost found needle. -- 0, if there was no match. +- index (starting from 1) of the leftmost found needle, Otherwise 0, if there was no match. [UInt8](../data-types/int-uint.md). **Example** @@ -594,12 +589,11 @@ multiSearchFirstIndexCaseInsensitiveUTF8(haystack, [needle1, needle2, ..., needl **Parameters** - `haystack` — UTF-8 string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `needle` — Array of UTF-8 substrings to be searched. [Array](../../sql-reference/data-types/array.md). +- `needle` — Array of UTF-8 substrings to be searched. [Array](../data-types/array.md). **Returned value** -- index (starting from 1) of the leftmost found needle. -- 0, if there was no match. +- index (starting from 1) of the leftmost found needle. Otherwise 0, if there was no match. [UInt8](../data-types/int-uint.md). **Example** @@ -632,7 +626,7 @@ multiSearchAny(haystack, [needle1, needle2, ..., needleN]) **Parameters** - `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `needle` — Substrings to be searched. [Array](../../sql-reference/data-types/array.md). +- `needle` — Substrings to be searched. [Array](../data-types/array.md). **Returned value** @@ -666,7 +660,7 @@ multiSearchAnyCaseInsensitive(haystack, [needle1, needle2, ..., needleN]) **Parameters** - `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `needle` — Substrings to be searched. [Array](../../sql-reference/data-types/array.md) +- `needle` — Substrings to be searched. [Array](../data-types/array.md) **Returned value** @@ -700,7 +694,7 @@ multiSearchAnyUTF8(haystack, [needle1, needle2, ..., needleN]) **Parameters** - `haystack` — UTF-8 string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `needle` — UTF-8 substrings to be searched. [Array](../../sql-reference/data-types/array.md). +- `needle` — UTF-8 substrings to be searched. [Array](../data-types/array.md). **Returned value** @@ -736,7 +730,7 @@ multiSearchAnyCaseInsensitiveUTF8(haystack, [needle1, needle2, ..., needleN]) **Parameters** - `haystack` — UTF-8 string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `needle` — UTF-8 substrings to be searched. [Array](../../sql-reference/data-types/array.md) +- `needle` — UTF-8 substrings to be searched. [Array](../data-types/array.md) **Returned value** @@ -799,7 +793,7 @@ If you only want to search multiple substrings in a string, you can use function **Syntax** ```sql -multiMatchAny(haystack, \[pattern1, pattern2, …, patternn\]) +multiMatchAny(haystack, \[pattern1, pattern2, ..., patternn\]) ``` ## multiMatchAnyIndex @@ -809,7 +803,7 @@ Like `multiMatchAny` but returns any index that matches the haystack. **Syntax** ```sql -multiMatchAnyIndex(haystack, \[pattern1, pattern2, …, patternn\]) +multiMatchAnyIndex(haystack, \[pattern1, pattern2, ..., patternn\]) ``` ## multiMatchAllIndices @@ -819,7 +813,7 @@ Like `multiMatchAny` but returns the array of all indices that match the haystac **Syntax** ```sql -multiMatchAllIndices(haystack, \[pattern1, pattern2, …, patternn\]) +multiMatchAllIndices(haystack, \[pattern1, pattern2, ..., patternn\]) ``` ## multiFuzzyMatchAny @@ -833,7 +827,7 @@ Like `multiMatchAny` but returns 1 if any pattern matches the haystack within a **Syntax** ```sql -multiFuzzyMatchAny(haystack, distance, \[pattern1, pattern2, …, patternn\]) +multiFuzzyMatchAny(haystack, distance, \[pattern1, pattern2, ..., patternn\]) ``` ## multiFuzzyMatchAnyIndex @@ -843,7 +837,7 @@ Like `multiFuzzyMatchAny` but returns any index that matches the haystack within **Syntax** ```sql -multiFuzzyMatchAnyIndex(haystack, distance, \[pattern1, pattern2, …, patternn\]) +multiFuzzyMatchAnyIndex(haystack, distance, \[pattern1, pattern2, ..., patternn\]) ``` ## multiFuzzyMatchAllIndices @@ -853,7 +847,7 @@ Like `multiFuzzyMatchAny` but returns the array of all indices in any order that **Syntax** ```sql -multiFuzzyMatchAllIndices(haystack, distance, \[pattern1, pattern2, …, patternn\]) +multiFuzzyMatchAllIndices(haystack, distance, \[pattern1, pattern2, ..., patternn\]) ``` ## extract @@ -896,14 +890,16 @@ extractAllGroupsHorizontal(haystack, pattern) **Arguments** -- `haystack` — Input string. Type: [String](../../sql-reference/data-types/string.md). -- `pattern` — Regular expression with [re2 syntax](https://github.com/google/re2/wiki/Syntax). Must contain groups, each group enclosed in parentheses. If `pattern` contains no groups, an exception is thrown. Type: [String](../../sql-reference/data-types/string.md). +- `haystack` — Input string. [String](../data-types/string.md). +- `pattern` — Regular expression with [re2 syntax](https://github.com/google/re2/wiki/Syntax). Must contain groups, each group enclosed in parentheses. If `pattern` contains no groups, an exception is thrown. [String](../data-types/string.md). **Returned value** -- Type: [Array](../../sql-reference/data-types/array.md). +- Array of arrays of matches. [Array](../data-types/array.md). +:::note If `haystack` does not match the `pattern` regex, an array of empty arrays is returned. +::: **Example** @@ -931,14 +927,16 @@ extractAllGroupsVertical(haystack, pattern) **Arguments** -- `haystack` — Input string. Type: [String](../../sql-reference/data-types/string.md). -- `pattern` — Regular expression with [re2 syntax](https://github.com/google/re2/wiki/Syntax). Must contain groups, each group enclosed in parentheses. If `pattern` contains no groups, an exception is thrown. Type: [String](../../sql-reference/data-types/string.md). +- `haystack` — Input string. [String](../data-types/string.md). +- `pattern` — Regular expression with [re2 syntax](https://github.com/google/re2/wiki/Syntax). Must contain groups, each group enclosed in parentheses. If `pattern` contains no groups, an exception is thrown. [String](../data-types/string.md). **Returned value** -- Type: [Array](../../sql-reference/data-types/array.md). +- Array of arrays of matches. [Array](../data-types/array.md). +:::note If `haystack` does not match the `pattern` regex, an empty array is returned. +::: **Example** @@ -968,7 +966,7 @@ Matching is based on UTF-8, e.g. `_` matches the Unicode code point `¥` which i If the haystack or the LIKE expression are not valid UTF-8, the behavior is undefined. -No automatic Unicode normalization is performed, you can use the [normalizeUTF8*()](https://clickhouse.com/docs/en/sql-reference/functions/string-functions/) functions for that. +No automatic Unicode normalization is performed, you can use the [normalizeUTF8*()](https://clickhouse.com../functions/string-functions/) functions for that. To match against literal `%`, `_` and `\` (which are LIKE metacharacters), prepend them with a backslash: `\%`, `\_` and `\\`. The backslash loses its special meaning (i.e. is interpreted literally) if it prepends a character different than `%`, `_` or `\`. @@ -1005,7 +1003,7 @@ Alias: `haystack NOT ILIKE pattern` (operator) ## ngramDistance -Calculates the 4-gram distance between a `haystack` string and a `needle` string. For this, it counts the symmetric difference between two multisets of 4-grams and normalizes it by the sum of their cardinalities. Returns a [Float32](../../sql-reference/data-types/float.md/#float32-float64) between 0 and 1. The smaller the result is, the more similar the strings are to each other. +Calculates the 4-gram distance between a `haystack` string and a `needle` string. For this, it counts the symmetric difference between two multisets of 4-grams and normalizes it by the sum of their cardinalities. Returns a [Float32](../data-types/float.md/#float32-float64) between 0 and 1. The smaller the result is, the more similar the strings are to each other. Functions [`ngramDistanceCaseInsensitive`](#ngramdistancecaseinsensitive), [`ngramDistanceUTF8`](#ngramdistanceutf8), [`ngramDistanceCaseInsensitiveUTF8`](#ngramdistancecaseinsensitiveutf8) provide case-insensitive and/or UTF-8 variants of this function. @@ -1022,7 +1020,7 @@ ngramDistance(haystack, needle) **Returned value** -- Value between 0 and 1 representing the similarity between the two strings. [Float32](../../sql-reference/data-types/float.md/#float32-float64) +- Value between 0 and 1 representing the similarity between the two strings. [Float32](../data-types/float.md/#float32-float64) **Implementation details** @@ -1076,7 +1074,7 @@ ngramDistanceCaseInsensitive(haystack, needle) **Returned value** -- Value between 0 and 1 representing the similarity between the two strings. [Float32](../../sql-reference/data-types/float.md/#float32-float64) +- Value between 0 and 1 representing the similarity between the two strings. [Float32](../data-types/float.md/#float32-float64) **Examples** @@ -1125,7 +1123,7 @@ ngramDistanceUTF8(haystack, needle) **Returned value** -- Value between 0 and 1 representing the similarity between the two strings. [Float32](../../sql-reference/data-types/float.md/#float32-float64) +- Value between 0 and 1 representing the similarity between the two strings. [Float32](../data-types/float.md/#float32-float64) **Example** @@ -1158,7 +1156,7 @@ ngramDistanceCaseInsensitiveUTF8(haystack, needle) **Returned value** -- Value between 0 and 1 representing the similarity between the two strings. [Float32](../../sql-reference/data-types/float.md/#float32-float64) +- Value between 0 and 1 representing the similarity between the two strings. [Float32](../data-types/float.md/#float32-float64) **Example** @@ -1176,7 +1174,7 @@ Result: ## ngramSearch -Like `ngramDistance` but calculates the non-symmetric difference between a `needle` string and a `haystack` string, i.e. the number of n-grams from the needle minus the common number of n-grams normalized by the number of `needle` n-grams. Returns a [Float32](../../sql-reference/data-types/float.md/#float32-float64) between 0 and 1. The bigger the result is, the more likely `needle` is in the `haystack`. This function is useful for fuzzy string search. Also see function [`soundex`](../../sql-reference/functions/string-functions#soundex). +Like `ngramDistance` but calculates the non-symmetric difference between a `needle` string and a `haystack` string, i.e. the number of n-grams from the needle minus the common number of n-grams normalized by the number of `needle` n-grams. Returns a [Float32](../data-types/float.md/#float32-float64) between 0 and 1. The bigger the result is, the more likely `needle` is in the `haystack`. This function is useful for fuzzy string search. Also see function [`soundex`](../../sql-reference/functions/string-functions#soundex). Functions [`ngramSearchCaseInsensitive`](#ngramsearchcaseinsensitive), [`ngramSearchUTF8`](#ngramsearchutf8), [`ngramSearchCaseInsensitiveUTF8`](#ngramsearchcaseinsensitiveutf8) provide case-insensitive and/or UTF-8 variants of this function. @@ -1193,7 +1191,7 @@ ngramSearch(haystack, needle) **Returned value** -- Value between 0 and 1 representing the likelihood of the `needle` being in the `haystack`. [Float32](../../sql-reference/data-types/float.md/#float32-float64) +- Value between 0 and 1 representing the likelihood of the `needle` being in the `haystack`. [Float32](../data-types/float.md/#float32-float64) **Implementation details** @@ -1232,7 +1230,7 @@ ngramSearchCaseInsensitive(haystack, needle) **Returned value** -- Value between 0 and 1 representing the likelihood of the `needle` being in the `haystack`. [Float32](../../sql-reference/data-types/float.md/#float32-float64) +- Value between 0 and 1 representing the likelihood of the `needle` being in the `haystack`. [Float32](../data-types/float.md/#float32-float64) The bigger the result is, the more likely `needle` is in the `haystack`. @@ -1267,7 +1265,7 @@ ngramSearchUTF8(haystack, needle) **Returned value** -- Value between 0 and 1 representing the likelihood of the `needle` being in the `haystack`. [Float32](../../sql-reference/data-types/float.md/#float32-float64) +- Value between 0 and 1 representing the likelihood of the `needle` being in the `haystack`. [Float32](../data-types/float.md/#float32-float64) The bigger the result is, the more likely `needle` is in the `haystack`. @@ -1302,7 +1300,7 @@ ngramSearchCaseInsensitiveUTF8(haystack, needle) **Returned value** -- Value between 0 and 1 representing the likelihood of the `needle` being in the `haystack`. [Float32](../../sql-reference/data-types/float.md/#float32-float64) +- Value between 0 and 1 representing the likelihood of the `needle` being in the `haystack`. [Float32](../data-types/float.md/#float32-float64) The bigger the result is, the more likely `needle` is in the `haystack`. @@ -1336,13 +1334,11 @@ countSubstrings(haystack, needle[, start_pos]) - `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). - `needle` — Substring to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `start_pos` – Position (1-based) in `haystack` at which the search starts. [UInt](../../sql-reference/data-types/int-uint.md). Optional. +- `start_pos` – Position (1-based) in `haystack` at which the search starts. [UInt](../data-types/int-uint.md). Optional. -**Returned values** +**Returned value** -- The number of occurrences. - -Type: [UInt64](../../sql-reference/data-types/int-uint.md). +- The number of occurrences. [UInt64](../data-types/int-uint.md). **Examples** @@ -1385,13 +1381,11 @@ countSubstringsCaseInsensitive(haystack, needle[, start_pos]) - `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). - `needle` — Substring to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `start_pos` – Position (1-based) in `haystack` at which the search starts. [UInt](../../sql-reference/data-types/int-uint.md). Optional. +- `start_pos` – Position (1-based) in `haystack` at which the search starts. [UInt](../data-types/int-uint.md). Optional. -**Returned values** +**Returned value** -- The number of occurrences. - -Type: [UInt64](../../sql-reference/data-types/int-uint.md). +- The number of occurrences. [UInt64](../data-types/int-uint.md). **Examples** @@ -1439,13 +1433,11 @@ countSubstringsCaseInsensitiveUTF8(haystack, needle[, start_pos]) - `haystack` — UTF-8 string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). - `needle` — Substring to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `start_pos` – Position (1-based) in `haystack` at which the search starts. [UInt](../../sql-reference/data-types/int-uint.md). Optional. +- `start_pos` – Position (1-based) in `haystack` at which the search starts. [UInt](../data-types/int-uint.md). Optional. -**Returned values** +**Returned value** -- The number of occurrences. - -Type: [UInt64](../../sql-reference/data-types/int-uint.md). +- The number of occurrences. [UInt64](../data-types/int-uint.md). **Examples** @@ -1492,13 +1484,11 @@ countMatches(haystack, pattern) **Arguments** - `haystack` — The string to search in. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `pattern` — The regular expression with [re2 syntax](https://github.com/google/re2/wiki/Syntax). [String](../../sql-reference/data-types/string.md). +- `pattern` — The regular expression with [re2 syntax](https://github.com/google/re2/wiki/Syntax). [String](../data-types/string.md). **Returned value** -- The number of matches. - -Type: [UInt64](../../sql-reference/data-types/int-uint.md). +- The number of matches. [UInt64](../data-types/int-uint.md). **Examples** @@ -1539,13 +1529,11 @@ countMatchesCaseInsensitive(haystack, pattern) **Arguments** - `haystack` — The string to search in. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `pattern` — The regular expression with [re2 syntax](https://github.com/google/re2/wiki/Syntax). [String](../../sql-reference/data-types/string.md). +- `pattern` — The regular expression with [re2 syntax](https://github.com/google/re2/wiki/Syntax). [String](../data-types/string.md). **Returned value** -- The number of matches. - -Type: [UInt64](../../sql-reference/data-types/int-uint.md). +- The number of matches. [UInt64](../data-types/int-uint.md). **Examples** @@ -1579,13 +1567,11 @@ Alias: `REGEXP_EXTRACT(haystack, pattern[, index])`. - `haystack` — String, in which regexp pattern will to be matched. [String](../../sql-reference/syntax.md#syntax-string-literal). - `pattern` — String, regexp expression, must be constant. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `index` – An integer number greater or equal 0 with default 1. It represents which regex group to extract. [UInt or Int](../../sql-reference/data-types/int-uint.md). Optional. +- `index` – An integer number greater or equal 0 with default 1. It represents which regex group to extract. [UInt or Int](../data-types/int-uint.md). Optional. -**Returned values** +**Returned value** -`pattern` may contain multiple regexp groups, `index` indicates which regex group to extract. An index of 0 means matching the entire regular expression. - -Type: `String`. +`pattern` may contain multiple regexp groups, `index` indicates which regex group to extract. An index of 0 means matching the entire regular expression. [String](../data-types/string.md). **Examples** @@ -1622,12 +1608,9 @@ hasSubsequence(haystack, needle) - `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). - `needle` — Subsequence to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). -**Returned values** +**Returned value** -- 1, if needle is a subsequence of haystack. -- 0, otherwise. - -Type: `UInt8`. +- 1, if needle is a subsequence of haystack, 0 otherwise. [UInt8](../data-types/int-uint.md). **Examples** @@ -1660,12 +1643,9 @@ hasSubsequenceCaseInsensitive(haystack, needle) - `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). - `needle` — Subsequence to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). -**Returned values** +**Returned value** -- 1, if needle is a subsequence of haystack. -- 0, otherwise. - -Type: `UInt8`. +- 1, if needle is a subsequence of haystack, 0 otherwise [UInt8](../data-types/int-uint.md). **Examples** @@ -1698,12 +1678,9 @@ hasSubsequenceUTF8(haystack, needle) - `haystack` — String in which the search is performed. UTF-8 encoded [String](../../sql-reference/syntax.md#syntax-string-literal). - `needle` — Subsequence to be searched. UTF-8 encoded [String](../../sql-reference/syntax.md#syntax-string-literal). -**Returned values** +**Returned value** -- 1, if needle is a subsequence of haystack. -- 0, otherwise. - -Type: `UInt8`. +- 1, if needle is a subsequence of haystack, 0, otherwise. [UInt8](../data-types/int-uint.md). Query: @@ -1736,12 +1713,9 @@ hasSubsequenceCaseInsensitiveUTF8(haystack, needle) - `haystack` — String in which the search is performed. UTF-8 encoded [String](../../sql-reference/syntax.md#syntax-string-literal). - `needle` — Subsequence to be searched. UTF-8 encoded [String](../../sql-reference/syntax.md#syntax-string-literal). -**Returned values** +**Returned value** -- 1, if needle is a subsequence of haystack. -- 0, otherwise. - -Type: `UInt8`. +- 1, if needle is a subsequence of haystack, 0 otherwise. [UInt8](../data-types/int-uint.md). **Examples** @@ -1776,8 +1750,7 @@ hasToken(haystack, token) **Returned value** -- 1, if the token is present in the haystack. -- 0, if the token is not present. +- 1, if the token is present in the haystack, 0 otherwise. [UInt8](../data-types/int-uint.md). **Implementation details** @@ -1812,9 +1785,7 @@ hasTokenOrNull(haystack, token) **Returned value** -- 1, if the token is present in the haystack. -- 0, if the token is not present in the haystack. -- null, if the token is ill-formed. +- 1, if the token is present in the haystack, 0 if it is not present, and null if the token is ill formed. **Implementation details** @@ -1851,8 +1822,7 @@ hasTokenCaseInsensitive(haystack, token) **Returned value** -- 1, if the token is present in the haystack. -- 0, otherwise. +- 1, if the token is present in the haystack, 0 otherwise. [UInt8](../data-types/int-uint.md). **Implementation details** @@ -1887,9 +1857,7 @@ hasTokenCaseInsensitiveOrNull(haystack, token) **Returned value** -- 1, if the token is present in the haystack. -- 0, if token is not present. -- null, if the token is ill-formed. +- 1, if the token is present in the haystack, 0 if the token is not present, otherwise [`null`](../data-types/nullable.md) if the token is ill-formed. [UInt8](../data-types/int-uint.md). **Implementation details** diff --git a/docs/en/sql-reference/functions/time-series-functions.md b/docs/en/sql-reference/functions/time-series-functions.md index e80a3fa9860..ce5dea14ec5 100644 --- a/docs/en/sql-reference/functions/time-series-functions.md +++ b/docs/en/sql-reference/functions/time-series-functions.md @@ -30,9 +30,7 @@ At least four data points are required in `series` to detect outliers. **Returned value** -- Returns an array of the same length as the input array where each value represents score of possible anomaly of corresponding element in the series. A non-zero score indicates a possible anomaly. - -Type: [Array](../../sql-reference/data-types/array.md). +- Returns an array of the same length as the input array where each value represents score of possible anomaly of corresponding element in the series. A non-zero score indicates a possible anomaly. [Array](../data-types/array.md). **Examples** @@ -81,10 +79,7 @@ seriesPeriodDetectFFT(series); **Returned value** -- A real value equal to the period of series data -- Returns NAN when number of data points are less than four. - -Type: [Float64](../../sql-reference/data-types/float.md). +- A real value equal to the period of series data. NaN when number of data points are less than four. [Float64](../data-types/float.md). **Examples** @@ -134,9 +129,7 @@ The number of data points in `series` should be at least twice the value of `per **Returned value** - An array of four arrays where the first array include seasonal components, the second array - trend, -the third array - residue component, and the fourth array - baseline(seasonal + trend) component. - -Type: [Array](../../sql-reference/data-types/array.md). +the third array - residue component, and the fourth array - baseline(seasonal + trend) component. [Array](../data-types/array.md). **Examples** diff --git a/docs/en/sql-reference/functions/time-window-functions.md b/docs/en/sql-reference/functions/time-window-functions.md index d8f23c92e61..2cec1987c20 100644 --- a/docs/en/sql-reference/functions/time-window-functions.md +++ b/docs/en/sql-reference/functions/time-window-functions.md @@ -17,15 +17,13 @@ tumble(time_attr, interval [, timezone]) ``` **Arguments** -- `time_attr` - Date and time. [DateTime](../../sql-reference/data-types/datetime.md) data type. -- `interval` - Window interval in [Interval](../../sql-reference/data-types/special-data-types/interval.md) data type. +- `time_attr` - Date and time. [DateTime](../data-types/datetime.md) data type. +- `interval` - Window interval in [Interval](../data-types/special-data-types/interval.md) data type. - `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). **Returned values** -- The inclusive lower and exclusive upper bound of the corresponding tumbling window. - -Type: `Tuple(DateTime, DateTime)` +- The inclusive lower and exclusive upper bound of the corresponding tumbling window. [Tuple](../data-types/tuple.md)([DateTime](../data-types/datetime.md), [DateTime](../data-types/datetime.md))`. **Example** @@ -53,16 +51,14 @@ hop(time_attr, hop_interval, window_interval [, timezone]) **Arguments** -- `time_attr` - Date and time. [DateTime](../../sql-reference/data-types/datetime.md) data type. -- `hop_interval` - Hop interval in [Interval](../../sql-reference/data-types/special-data-types/interval.md) data type. Should be a positive number. -- `window_interval` - Window interval in [Interval](../../sql-reference/data-types/special-data-types/interval.md) data type. Should be a positive number. +- `time_attr` - Date and time. [DateTime](../data-types/datetime.md) data type. +- `hop_interval` - Hop interval in [Interval](../data-types/special-data-types/interval.md) data type. Should be a positive number. +- `window_interval` - Window interval in [Interval](../data-types/special-data-types/interval.md) data type. Should be a positive number. - `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). **Returned values** -- The inclusive lower and exclusive upper bound of the corresponding hopping window. Since one record can be assigned to multiple hop windows, the function only returns the bound of the **first** window when hop function is used **without** `WINDOW VIEW`. - -Type: `Tuple(DateTime, DateTime)` +- The inclusive lower and exclusive upper bound of the corresponding hopping window. Since one record can be assigned to multiple hop windows, the function only returns the bound of the **first** window when hop function is used **without** `WINDOW VIEW`. [Tuple](../data-types/tuple.md)([DateTime](../data-types/datetime.md), [DateTime](../data-types/datetime.md))`. **Example** diff --git a/docs/en/sql-reference/functions/tuple-functions.md b/docs/en/sql-reference/functions/tuple-functions.md index 64b1732597f..0663be08240 100644 --- a/docs/en/sql-reference/functions/tuple-functions.md +++ b/docs/en/sql-reference/functions/tuple-functions.md @@ -7,15 +7,15 @@ sidebar_label: Tuples ## tuple A function that allows grouping multiple columns. -For columns with the types T1, T2, …, it returns a Tuple(T1, T2, …) type tuple containing these columns. There is no cost to execute the function. +For columns with the types T1, T2, ..., it returns a Tuple(T1, T2, ...) type tuple containing these columns. There is no cost to execute the function. Tuples are normally used as intermediate values for an argument of IN operators, or for creating a list of formal parameters of lambda functions. Tuples can’t be written to a table. -The function implements the operator `(x, y, …)`. +The function implements the operator `(x, y, ...)`. **Syntax** ``` sql -tuple(x, y, …) +tuple(x, y, ...) ``` ## tupleElement @@ -35,7 +35,7 @@ tupleElement(tuple, name, [, default_value]) ## untuple -Performs syntactic substitution of [tuple](../../sql-reference/data-types/tuple.md#tuplet1-t2) elements in the call location. +Performs syntactic substitution of [tuple](../data-types/tuple.md#tuplet1-t2) elements in the call location. The names of the result columns are implementation-specific and subject to change. Do not assume specific column names after `untuple`. @@ -49,7 +49,7 @@ You can use the `EXCEPT` expression to skip columns as a result of the query. **Arguments** -- `x` — A `tuple` function, column, or tuple of elements. [Tuple](../../sql-reference/data-types/tuple.md). +- `x` — A `tuple` function, column, or tuple of elements. [Tuple](../data-types/tuple.md). **Returned value** @@ -111,7 +111,7 @@ Result: **See Also** -- [Tuple](../../sql-reference/data-types/tuple.md) +- [Tuple](../data-types/tuple.md) ## tupleHammingDistance @@ -125,8 +125,8 @@ tupleHammingDistance(tuple1, tuple2) **Arguments** -- `tuple1` — First tuple. [Tuple](../../sql-reference/data-types/tuple.md). -- `tuple2` — Second tuple. [Tuple](../../sql-reference/data-types/tuple.md). +- `tuple1` — First tuple. [Tuple](../data-types/tuple.md). +- `tuple2` — Second tuple. [Tuple](../data-types/tuple.md). Tuples should have the same type of the elements. @@ -134,7 +134,9 @@ Tuples should have the same type of the elements. - The Hamming distance. -Type: The result type is calculated the same way it is for [Arithmetic functions](../../sql-reference/functions/arithmetic-functions.md), based on the number of elements in the input tuples. +:::note +The result type is calculated the same way it is for [Arithmetic functions](../../sql-reference/functions/arithmetic-functions.md), based on the number of elements in the input tuples. +::: ``` sql SELECT @@ -196,13 +198,11 @@ tupleToNameValuePairs(tuple) **Arguments** -- `tuple` — Named tuple. [Tuple](../../sql-reference/data-types/tuple.md) with any types of values. +- `tuple` — Named tuple. [Tuple](../data-types/tuple.md) with any types of values. **Returned value** -- An array with (name, value) pairs. - -Type: [Array](../../sql-reference/data-types/array.md)([Tuple](../../sql-reference/data-types/tuple.md)([String](../../sql-reference/data-types/string.md), ...)). +- An array with (name, value) pairs. [Array](../data-types/array.md)([Tuple](../data-types/tuple.md)([String](../data-types/string.md), ...)). **Example** @@ -273,14 +273,12 @@ Alias: `vectorSum`. **Arguments** -- `tuple1` — First tuple. [Tuple](../../sql-reference/data-types/tuple.md). -- `tuple2` — Second tuple. [Tuple](../../sql-reference/data-types/tuple.md). +- `tuple1` — First tuple. [Tuple](../data-types/tuple.md). +- `tuple2` — Second tuple. [Tuple](../data-types/tuple.md). **Returned value** -- Tuple with the sum. - -Type: [Tuple](../../sql-reference/data-types/tuple.md). +- Tuple with the sum. [Tuple](../data-types/tuple.md). **Example** @@ -312,14 +310,12 @@ Alias: `vectorDifference`. **Arguments** -- `tuple1` — First tuple. [Tuple](../../sql-reference/data-types/tuple.md). -- `tuple2` — Second tuple. [Tuple](../../sql-reference/data-types/tuple.md). +- `tuple1` — First tuple. [Tuple](../data-types/tuple.md). +- `tuple2` — Second tuple. [Tuple](../data-types/tuple.md). **Returned value** -- Tuple with the result of subtraction. - -Type: [Tuple](../../sql-reference/data-types/tuple.md). +- Tuple with the result of subtraction. [Tuple](../data-types/tuple.md). **Example** @@ -349,14 +345,12 @@ tupleMultiply(tuple1, tuple2) **Arguments** -- `tuple1` — First tuple. [Tuple](../../sql-reference/data-types/tuple.md). -- `tuple2` — Second tuple. [Tuple](../../sql-reference/data-types/tuple.md). +- `tuple1` — First tuple. [Tuple](../data-types/tuple.md). +- `tuple2` — Second tuple. [Tuple](../data-types/tuple.md). **Returned value** -- Tuple with the multiplication. - -Type: [Tuple](../../sql-reference/data-types/tuple.md). +- Tuple with the multiplication. [Tuple](../data-types/tuple.md). **Example** @@ -386,14 +380,12 @@ tupleDivide(tuple1, tuple2) **Arguments** -- `tuple1` — First tuple. [Tuple](../../sql-reference/data-types/tuple.md). -- `tuple2` — Second tuple. [Tuple](../../sql-reference/data-types/tuple.md). +- `tuple1` — First tuple. [Tuple](../data-types/tuple.md). +- `tuple2` — Second tuple. [Tuple](../data-types/tuple.md). **Returned value** -- Tuple with the result of division. - -Type: [Tuple](../../sql-reference/data-types/tuple.md). +- Tuple with the result of division. [Tuple](../data-types/tuple.md). **Example** @@ -423,13 +415,11 @@ tupleNegate(tuple) **Arguments** -- `tuple` — [Tuple](../../sql-reference/data-types/tuple.md). +- `tuple` — [Tuple](../data-types/tuple.md). **Returned value** -- Tuple with the result of negation. - -Type: [Tuple](../../sql-reference/data-types/tuple.md). +- Tuple with the result of negation. [Tuple](../data-types/tuple.md). **Example** @@ -459,14 +449,12 @@ tupleMultiplyByNumber(tuple, number) **Arguments** -- `tuple` — [Tuple](../../sql-reference/data-types/tuple.md). -- `number` — Multiplier. [Int/UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `tuple` — [Tuple](../data-types/tuple.md). +- `number` — Multiplier. [Int/UInt](../data-types/int-uint.md), [Float](../data-types/float.md) or [Decimal](../data-types/decimal.md). **Returned value** -- Tuple with multiplied values. - -Type: [Tuple](../../sql-reference/data-types/tuple.md). +- Tuple with multiplied values. [Tuple](../data-types/tuple.md). **Example** @@ -496,14 +484,12 @@ tupleDivideByNumber(tuple, number) **Arguments** -- `tuple` — [Tuple](../../sql-reference/data-types/tuple.md). -- `number` — Divider. [Int/UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `tuple` — [Tuple](../data-types/tuple.md). +- `number` — Divider. [Int/UInt](../data-types/int-uint.md), [Float](../data-types/float.md) or [Decimal](../data-types/decimal.md). **Returned value** -- Tuple with divided values. - -Type: [Tuple](../../sql-reference/data-types/tuple.md). +- Tuple with divided values. [Tuple](../data-types/tuple.md). **Example** @@ -531,7 +517,7 @@ tupleConcat(tuples) **Arguments** -- `tuples` – Arbitrary number of arguments of [Tuple](../../sql-reference/data-types/tuple.md) type. +- `tuples` – Arbitrary number of arguments of [Tuple](../data-types/tuple.md) type. **Example** diff --git a/docs/en/sql-reference/functions/tuple-map-functions.md b/docs/en/sql-reference/functions/tuple-map-functions.md index 377283bc006..d9c18e2a0a2 100644 --- a/docs/en/sql-reference/functions/tuple-map-functions.md +++ b/docs/en/sql-reference/functions/tuple-map-functions.md @@ -6,7 +6,7 @@ sidebar_label: Maps ## map -Arranges `key:value` pairs into [Map(key, value)](../../sql-reference/data-types/map.md) data type. +Arranges `key:value` pairs into [Map(key, value)](../data-types/map.md) data type. **Syntax** @@ -16,14 +16,12 @@ map(key1, value1[, key2, value2, ...]) **Arguments** -- `key` — The key part of the pair. Arbitrary type, except [Nullable](../../sql-reference/data-types/nullable.md) and [LowCardinality](../../sql-reference/data-types/lowcardinality.md) nested with [Nullable](../../sql-reference/data-types/nullable.md). -- `value` — The value part of the pair. Arbitrary type, including [Map](../../sql-reference/data-types/map.md) and [Array](../../sql-reference/data-types/array.md). +- `key` — The key part of the pair. Arbitrary type, except [Nullable](../data-types/nullable.md) and [LowCardinality](../data-types/lowcardinality.md) nested with [Nullable](../data-types/nullable.md). +- `value` — The value part of the pair. Arbitrary type, including [Map](../data-types/map.md) and [Array](../data-types/array.md). **Returned value** -- Data structure as `key:value` pairs. - -Type: [Map(key, value)](../../sql-reference/data-types/map.md). +- Data structure as `key:value` pairs. [Map(key, value)](../data-types/map.md). **Examples** @@ -63,11 +61,11 @@ Result: **See Also** -- [Map(key, value)](../../sql-reference/data-types/map.md) data type +- [Map(key, value)](../data-types/map.md) data type ## mapFromArrays -Merges an [Array](../../sql-reference/data-types/array.md) of keys and an [Array](../../sql-reference/data-types/array.md) of values into a [Map(key, value)](../../sql-reference/data-types/map.md). Notice that the second argument could also be a [Map](../../sql-reference/data-types/map.md), thus it is casted to an Array when executing. +Merges an [Array](../data-types/array.md) of keys and an [Array](../data-types/array.md) of values into a [Map(key, value)](../data-types/map.md). Notice that the second argument could also be a [Map](../data-types/map.md), thus it is casted to an Array when executing. The function is a more convenient alternative to `CAST((key_array, value_array_or_map), 'Map(key_type, value_type)')`. For example, instead of writing `CAST((['aa', 'bb'], [4, 5]), 'Map(String, UInt32)')`, you can write `mapFromArrays(['aa', 'bb'], [4, 5])`. @@ -83,7 +81,7 @@ Alias: `MAP_FROM_ARRAYS(keys, values)` **Arguments** -- `keys` — Given key array to create a map from. The nested type of array must be: [String](../../sql-reference/data-types/string.md), [Integer](../../sql-reference/data-types/int-uint.md), [LowCardinality](../../sql-reference/data-types/lowcardinality.md), [FixedString](../../sql-reference/data-types/fixedstring.md), [UUID](../../sql-reference/data-types/uuid.md), [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md), [Date32](../../sql-reference/data-types/date32.md), [Enum](../../sql-reference/data-types/enum.md) +- `keys` — Given key array to create a map from. The nested type of array must be: [String](../data-types/string.md), [Integer](../data-types/int-uint.md), [LowCardinality](../data-types/lowcardinality.md), [FixedString](../data-types/fixedstring.md), [UUID](../data-types/uuid.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [Date32](../data-types/date32.md), [Enum](../data-types/enum.md) - `values` - Given value array or map to create a map from. **Returned value** @@ -111,7 +109,7 @@ SELECT mapFromArrays([1, 2, 3], map('a', 1, 'b', 2, 'c', 3)) ## extractKeyValuePairs -Extracts key-value pairs, i.e. a [Map(String, String)](../../sql-reference/data-types/map.md), from a string. Parsing is robust towards noise (e.g. log files). +Extracts key-value pairs, i.e. a [Map(String, String)](../data-types/map.md), from a string. Parsing is robust towards noise (e.g. log files). A key-value pair consists of a key, followed by a `key_value_delimiter` and a value. Key value pairs must be separated by `pair_delimiter`. Quoted keys and values are also supported. @@ -127,14 +125,14 @@ Alias: **Arguments** -- `data` - String to extract key-value pairs from. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). -- `key_value_delimiter` - Character to be used as delimiter between the key and the value. Defaults to `:`. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). -- `pair_delimiters` - Set of character to be used as delimiters between pairs. Defaults to ` `, `,` and `;`. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). -- `quoting_character` - Character to be used as quoting character. Defaults to `"`. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). +- `data` - String to extract key-value pairs from. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). +- `key_value_delimiter` - Character to be used as delimiter between the key and the value. Defaults to `:`. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). +- `pair_delimiters` - Set of character to be used as delimiters between pairs. Defaults to ` `, `,` and `;`. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). +- `quoting_character` - Character to be used as quoting character. Defaults to `"`. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). **Returned values** -- A [Map(String, String)](../../sql-reference/data-types/map.md) of key-value pairs. +- A [Map(String, String)](../data-types/map.md) of key-value pairs. **Examples** @@ -223,11 +221,11 @@ mapAdd(arg1, arg2 [, ...]) **Arguments** -Arguments are [maps](../../sql-reference/data-types/map.md) or [tuples](../../sql-reference/data-types/tuple.md#tuplet1-t2) of two [arrays](../../sql-reference/data-types/array.md#data-type-array), where items in the first array represent keys, and the second array contains values for the each key. All key arrays should have same type, and all value arrays should contain items which are promoted to the one type ([Int64](../../sql-reference/data-types/int-uint.md#int-ranges), [UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges) or [Float64](../../sql-reference/data-types/float.md#float32-float64)). The common promoted type is used as a type for the result array. +Arguments are [maps](../data-types/map.md) or [tuples](../data-types/tuple.md#tuplet1-t2) of two [arrays](../data-types/array.md#data-type-array), where items in the first array represent keys, and the second array contains values for the each key. All key arrays should have same type, and all value arrays should contain items which are promoted to the one type ([Int64](../data-types/int-uint.md#int-ranges), [UInt64](../data-types/int-uint.md#uint-ranges) or [Float64](../data-types/float.md#float32-float64)). The common promoted type is used as a type for the result array. **Returned value** -- Depending on the arguments returns one [map](../../sql-reference/data-types/map.md) or [tuple](../../sql-reference/data-types/tuple.md#tuplet1-t2), where the first array contains the sorted keys and the second array contains values. +- Depending on the arguments returns one [map](../data-types/map.md) or [tuple](../data-types/tuple.md#tuplet1-t2), where the first array contains the sorted keys and the second array contains values. **Example** @@ -271,11 +269,11 @@ mapSubtract(Tuple(Array, Array), Tuple(Array, Array) [, ...]) **Arguments** -Arguments are [maps](../../sql-reference/data-types/map.md) or [tuples](../../sql-reference/data-types/tuple.md#tuplet1-t2) of two [arrays](../../sql-reference/data-types/array.md#data-type-array), where items in the first array represent keys, and the second array contains values for the each key. All key arrays should have same type, and all value arrays should contain items which are promote to the one type ([Int64](../../sql-reference/data-types/int-uint.md#int-ranges), [UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges) or [Float64](../../sql-reference/data-types/float.md#float32-float64)). The common promoted type is used as a type for the result array. +Arguments are [maps](../data-types/map.md) or [tuples](../data-types/tuple.md#tuplet1-t2) of two [arrays](../data-types/array.md#data-type-array), where items in the first array represent keys, and the second array contains values for the each key. All key arrays should have same type, and all value arrays should contain items which are promote to the one type ([Int64](../data-types/int-uint.md#int-ranges), [UInt64](../data-types/int-uint.md#uint-ranges) or [Float64](../data-types/float.md#float32-float64)). The common promoted type is used as a type for the result array. **Returned value** -- Depending on the arguments returns one [map](../../sql-reference/data-types/map.md) or [tuple](../../sql-reference/data-types/tuple.md#tuplet1-t2), where the first array contains the sorted keys and the second array contains values. +- Depending on the arguments returns one [map](../data-types/map.md) or [tuple](../data-types/tuple.md#tuplet1-t2), where the first array contains the sorted keys and the second array contains values. **Example** @@ -324,21 +322,21 @@ For array arguments the number of elements in `keys` and `values` must be the sa **Arguments** -Arguments are [maps](../../sql-reference/data-types/map.md) or two [arrays](../../sql-reference/data-types/array.md#data-type-array), where the first array represent keys, and the second array contains values for the each key. +Arguments are [maps](../data-types/map.md) or two [arrays](../data-types/array.md#data-type-array), where the first array represent keys, and the second array contains values for the each key. Mapped arrays: -- `keys` — Array of keys. [Array](../../sql-reference/data-types/array.md#data-type-array)([Int](../../sql-reference/data-types/int-uint.md#uint-ranges)). -- `values` — Array of values. [Array](../../sql-reference/data-types/array.md#data-type-array)([Int](../../sql-reference/data-types/int-uint.md#uint-ranges)). -- `max` — Maximum key value. Optional. [Int8, Int16, Int32, Int64, Int128, Int256](../../sql-reference/data-types/int-uint.md#int-ranges). +- `keys` — Array of keys. [Array](../data-types/array.md#data-type-array)([Int](../data-types/int-uint.md#uint-ranges)). +- `values` — Array of values. [Array](../data-types/array.md#data-type-array)([Int](../data-types/int-uint.md#uint-ranges)). +- `max` — Maximum key value. Optional. [Int8, Int16, Int32, Int64, Int128, Int256](../data-types/int-uint.md#int-ranges). or -- `map` — Map with integer keys. [Map](../../sql-reference/data-types/map.md). +- `map` — Map with integer keys. [Map](../data-types/map.md). **Returned value** -- Depending on the arguments returns a [map](../../sql-reference/data-types/map.md) or a [tuple](../../sql-reference/data-types/tuple.md#tuplet1-t2) of two [arrays](../../sql-reference/data-types/array.md#data-type-array): keys in sorted order, and values the corresponding keys. +- Depending on the arguments returns a [map](../data-types/map.md) or a [tuple](../data-types/tuple.md#tuplet1-t2) of two [arrays](../data-types/array.md#data-type-array): keys in sorted order, and values the corresponding keys. **Example** @@ -382,14 +380,12 @@ mapContains(map, key) **Arguments** -- `map` — Map. [Map](../../sql-reference/data-types/map.md). +- `map` — Map. [Map](../data-types/map.md). - `key` — Key. Type matches the type of keys of `map` parameter. **Returned value** -- `1` if `map` contains `key`, `0` if not. - -Type: [UInt8](../../sql-reference/data-types/int-uint.md). +- `1` if `map` contains `key`, `0` if not. [UInt8](../data-types/int-uint.md). **Example** @@ -417,7 +413,7 @@ Result: Returns all keys from the `map` parameter. -Can be optimized by enabling the [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns) setting. With `optimize_functions_to_subcolumns = 1` the function reads only [keys](../../sql-reference/data-types/map.md#map-subcolumns) subcolumn instead of reading and processing the whole column data. The query `SELECT mapKeys(m) FROM table` transforms to `SELECT m.keys FROM table`. +Can be optimized by enabling the [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns) setting. With `optimize_functions_to_subcolumns = 1` the function reads only [keys](../data-types/map.md#map-subcolumns) subcolumn instead of reading and processing the whole column data. The query `SELECT mapKeys(m) FROM table` transforms to `SELECT m.keys FROM table`. **Syntax** @@ -427,13 +423,11 @@ mapKeys(map) **Arguments** -- `map` — Map. [Map](../../sql-reference/data-types/map.md). +- `map` — Map. [Map](../data-types/map.md). **Returned value** -- Array containing all keys from the `map`. - -Type: [Array](../../sql-reference/data-types/array.md). +- Array containing all keys from the `map`. [Array](../data-types/array.md). **Example** @@ -460,7 +454,7 @@ Result: Returns all values from the `map` parameter. -Can be optimized by enabling the [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns) setting. With `optimize_functions_to_subcolumns = 1` the function reads only [values](../../sql-reference/data-types/map.md#map-subcolumns) subcolumn instead of reading and processing the whole column data. The query `SELECT mapValues(m) FROM table` transforms to `SELECT m.values FROM table`. +Can be optimized by enabling the [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns) setting. With `optimize_functions_to_subcolumns = 1` the function reads only [values](../data-types/map.md#map-subcolumns) subcolumn instead of reading and processing the whole column data. The query `SELECT mapValues(m) FROM table` transforms to `SELECT m.values FROM table`. **Syntax** @@ -470,13 +464,11 @@ mapValues(map) **Arguments** -- `map` — Map. [Map](../../sql-reference/data-types/map.md). +- `map` — Map. [Map](../data-types/map.md). **Returned value** -- Array containing all the values from `map`. - -Type: [Array](../../sql-reference/data-types/array.md). +- Array containing all the values from `map`. [Array](../data-types/array.md). **Example** @@ -508,7 +500,7 @@ mapContainsKeyLike(map, pattern) ``` **Arguments** -- `map` — Map. [Map](../../sql-reference/data-types/map.md). +- `map` — Map. [Map](../data-types/map.md). - `pattern` - String pattern to match. **Returned value** @@ -546,7 +538,7 @@ mapExtractKeyLike(map, pattern) **Arguments** -- `map` — Map. [Map](../../sql-reference/data-types/map.md). +- `map` — Map. [Map](../data-types/map.md). - `pattern` - String pattern to match. **Returned value** @@ -585,11 +577,11 @@ mapApply(func, map) **Arguments** - `func` - [Lambda function](../../sql-reference/functions/index.md#higher-order-functions---operator-and-lambdaparams-expr-function). -- `map` — [Map](../../sql-reference/data-types/map.md). +- `map` — [Map](../data-types/map.md). **Returned value** -- Returns a map obtained from the original map by application of `func(map1[i], …, mapN[i])` for each element. +- Returns a map obtained from the original map by application of `func(map1[i], ..., mapN[i])` for each element. **Example** @@ -625,11 +617,11 @@ mapFilter(func, map) **Arguments** - `func` - [Lambda function](../../sql-reference/functions/index.md#higher-order-functions---operator-and-lambdaparams-expr-function). -- `map` — [Map](../../sql-reference/data-types/map.md). +- `map` — [Map](../data-types/map.md). **Returned value** -- Returns a map containing only the elements in `map` for which `func(map1[i], …, mapN[i])` returns something other than 0. +- Returns a map containing only the elements in `map` for which `func(map1[i], ..., mapN[i])` returns something other than 0. **Example** @@ -666,8 +658,8 @@ mapUpdate(map1, map2) **Arguments** -- `map1` [Map](../../sql-reference/data-types/map.md). -- `map2` [Map](../../sql-reference/data-types/map.md). +- `map1` [Map](../data-types/map.md). +- `map2` [Map](../data-types/map.md). **Returned value** @@ -699,7 +691,7 @@ mapConcat(maps) **Arguments** -- `maps` – Arbitrary number of arguments of [Map](../../sql-reference/data-types/map.md) type. +- `maps` – Arbitrary number of arguments of [Map](../data-types/map.md) type. **Returned value** diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index ea08ffa50e7..5dd1d5ceebe 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -51,7 +51,7 @@ SETTINGS cast_keep_nullable = 1 ## toInt(8\|16\|32\|64\|128\|256) -Converts an input value to a value the [Int](/docs/en/sql-reference/data-types/int-uint.md) data type. This function family includes: +Converts an input value to a value the [Int](../data-types/int-uint.md) data type. This function family includes: - `toInt8(expr)` — Converts to a value of data type `Int8`. - `toInt16(expr)` — Converts to a value of data type `Int16`. @@ -62,7 +62,7 @@ Converts an input value to a value the [Int](/docs/en/sql-reference/data-types/i **Arguments** -- `expr` — [Expression](/docs/en/sql-reference/syntax.md/#syntax-expressions) returning a number or a string with the decimal representation of a number. Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. +- `expr` — [Expression](../syntax.md/#syntax-expressions) returning a number or a string with the decimal representation of a number. Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. **Returned value** @@ -70,7 +70,7 @@ Integer value in the `Int8`, `Int16`, `Int32`, `Int64`, `Int128` or `Int256` dat Functions use [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning they truncate fractional digits of numbers. -The behavior of functions for the [NaN and Inf](/docs/en/sql-reference/data-types/float.md/#data_type-float-nan-inf) arguments is undefined. Remember about [numeric conversions issues](#numeric-conversion-issues), when using the functions. +The behavior of functions for the [NaN and Inf](../data-types/float.md/#data_type-float-nan-inf) arguments is undefined. Remember about [numeric conversions issues](#numeric-conversion-issues), when using the functions. **Example** @@ -90,7 +90,7 @@ Result: ## toInt(8\|16\|32\|64\|128\|256)OrZero -Takes an argument of type [String](/docs/en/sql-reference/data-types/string.md) and tries to parse it into an Int (8 \| 16 \| 32 \| 64 \| 128 \| 256). If unsuccessful, returns `0`. +Takes an argument of type [String](../data-types/string.md) and tries to parse it into an Int (8 \| 16 \| 32 \| 64 \| 128 \| 256). If unsuccessful, returns `0`. **Example** @@ -151,7 +151,7 @@ Result: ## toUInt(8\|16\|32\|64\|256) -Converts an input value to the [UInt](/docs/en/sql-reference/data-types/int-uint.md) data type. This function family includes: +Converts an input value to the [UInt](../data-types/int-uint.md) data type. This function family includes: - `toUInt8(expr)` — Converts to a value of data type `UInt8`. - `toUInt16(expr)` — Converts to a value of data type `UInt16`. @@ -161,7 +161,7 @@ Converts an input value to the [UInt](/docs/en/sql-reference/data-types/int-uint **Arguments** -- `expr` — [Expression](/docs/en/sql-reference/syntax.md/#syntax-expressions) returning a number or a string with the decimal representation of a number. Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. +- `expr` — [Expression](../syntax.md/#syntax-expressions) returning a number or a string with the decimal representation of a number. Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. **Returned value** @@ -169,7 +169,7 @@ Converts an input value to the [UInt](/docs/en/sql-reference/data-types/int-uint Functions use [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning they truncate fractional digits of numbers. -The behavior of functions for negative arguments and for the [NaN and Inf](/docs/en/sql-reference/data-types/float.md/#data_type-float-nan-inf) arguments is undefined. If you pass a string with a negative number, for example `'-32'`, ClickHouse raises an exception. Remember about [numeric conversions issues](#numeric-conversion-issues), when using the functions. +The behavior of functions for negative arguments and for the [NaN and Inf](../data-types/float.md/#data_type-float-nan-inf) arguments is undefined. If you pass a string with a negative number, for example `'-32'`, ClickHouse raises an exception. Remember about [numeric conversions issues](#numeric-conversion-issues), when using the functions. **Example** @@ -203,9 +203,9 @@ Result: ## toDate -Converts the argument to [Date](/docs/en/sql-reference/data-types/date.md) data type. +Converts the argument to [Date](../data-types/date.md) data type. -If the argument is [DateTime](/docs/en/sql-reference/data-types/datetime.md) or [DateTime64](/docs/en/sql-reference/data-types/datetime64.md), it truncates it and leaves the date component of the DateTime: +If the argument is [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md), it truncates it and leaves the date component of the DateTime: ```sql SELECT @@ -219,7 +219,7 @@ SELECT └─────────────────────┴───────────────┘ ``` -If the argument is a [String](/docs/en/sql-reference/data-types/string.md), it is parsed as [Date](/docs/en/sql-reference/data-types/date.md) or [DateTime](/docs/en/sql-reference/data-types/datetime.md). If it was parsed as [DateTime](/docs/en/sql-reference/data-types/datetime.md), the date component is being used: +If the argument is a [String](../data-types/string.md), it is parsed as [Date](../data-types/date.md) or [DateTime](../data-types/datetime.md). If it was parsed as [DateTime](../data-types/datetime.md), the date component is being used: ```sql SELECT @@ -247,7 +247,7 @@ SELECT └────────────┴───────────────────────────────────────────┘ ``` -If the argument is a number and looks like a UNIX timestamp (is greater than 65535), it is interpreted as a [DateTime](/docs/en/sql-reference/data-types/datetime.md), then truncated to [Date](/docs/en/sql-reference/data-types/date.md) in the current timezone. The timezone argument can be specified as a second argument of the function. The truncation to [Date](/docs/en/sql-reference/data-types/date.md) depends on the timezone: +If the argument is a number and looks like a UNIX timestamp (is greater than 65535), it is interpreted as a [DateTime](../data-types/datetime.md), then truncated to [Date](../data-types/date.md) in the current timezone. The timezone argument can be specified as a second argument of the function. The truncation to [Date](../data-types/date.md) depends on the timezone: ```sql SELECT @@ -276,7 +276,7 @@ date_Samoa_2: 2022-12-31 The example above demonstrates how the same UNIX timestamp can be interpreted as different dates in different time zones. -If the argument is a number and it is smaller than 65536, it is interpreted as the number of days since 1970-01-01 (the first UNIX day) and converted to [Date](/docs/en/sql-reference/data-types/date.md). It corresponds to the internal numeric representation of the `Date` data type. Example: +If the argument is a number and it is smaller than 65536, it is interpreted as the number of days since 1970-01-01 (the first UNIX day) and converted to [Date](../data-types/date.md). It corresponds to the internal numeric representation of the `Date` data type. Example: ```sql SELECT toDate(12345) @@ -317,7 +317,7 @@ SELECT ## toDateOrZero -The same as [toDate](#todate) but returns lower boundary of [Date](/docs/en/sql-reference/data-types/date.md) if an invalid argument is received. Only [String](/docs/en/sql-reference/data-types/string.md) argument is supported. +The same as [toDate](#todate) but returns lower boundary of [Date](../data-types/date.md) if an invalid argument is received. Only [String](../data-types/string.md) argument is supported. **Example** @@ -338,7 +338,7 @@ Result: ## toDateOrNull -The same as [toDate](#todate) but returns `NULL` if an invalid argument is received. Only [String](/docs/en/sql-reference/data-types/string.md) argument is supported. +The same as [toDate](#todate) but returns `NULL` if an invalid argument is received. Only [String](../data-types/string.md) argument is supported. **Example** @@ -359,7 +359,7 @@ Result: ## toDateOrDefault -Like [toDate](#todate) but if unsuccessful, returns a default value which is either the second argument (if specified), or otherwise the lower boundary of [Date](/docs/en/sql-reference/data-types/date.md). +Like [toDate](#todate) but if unsuccessful, returns a default value which is either the second argument (if specified), or otherwise the lower boundary of [Date](../data-types/date.md). **Syntax** @@ -386,7 +386,7 @@ Result: ## toDateTime -Converts an input value to [DateTime](/docs/en/sql-reference/data-types/datetime.md). +Converts an input value to [DateTime](../data-types/datetime.md). **Syntax** @@ -396,18 +396,18 @@ toDateTime(expr[, time_zone ]) **Arguments** -- `expr` — The value. [String](/docs/en/sql-reference/data-types/string.md), [Int](/docs/en/sql-reference/data-types/int-uint.md), [Date](/docs/en/sql-reference/data-types/date.md) or [DateTime](/docs/en/sql-reference/data-types/datetime.md). -- `time_zone` — Time zone. [String](/docs/en/sql-reference/data-types/string.md). +- `expr` — The value. [String](../data-types/string.md), [Int](../data-types/int-uint.md), [Date](../data-types/date.md) or [DateTime](../data-types/datetime.md). +- `time_zone` — Time zone. [String](../data-types/string.md). :::note If `expr` is a number, it is interpreted as the number of seconds since the beginning of the Unix Epoch (as Unix timestamp). -If `expr` is a [String](/docs/en/sql-reference/data-types/string.md), it may be interpreted as a Unix timestamp or as a string representation of date / date with time. +If `expr` is a [String](../data-types/string.md), it may be interpreted as a Unix timestamp or as a string representation of date / date with time. Thus, parsing of short numbers' string representations (up to 4 digits) is explicitly disabled due to ambiguity, e.g. a string `'1999'` may be both a year (an incomplete string representation of Date / DateTime) or a unix timestamp. Longer numeric strings are allowed. ::: **Returned value** -- A date time. [DateTime](/docs/en/sql-reference/data-types/datetime.md) +- A date time. [DateTime](../data-types/datetime.md) **Example** @@ -428,7 +428,7 @@ Result: ## toDateTimeOrZero -The same as [toDateTime](#todatetime) but returns lower boundary of [DateTime](/docs/en/sql-reference/data-types/datetime.md) if an invalid argument is received. Only [String](/docs/en/sql-reference/data-types/string.md) argument is supported. +The same as [toDateTime](#todatetime) but returns lower boundary of [DateTime](../data-types/datetime.md) if an invalid argument is received. Only [String](../data-types/string.md) argument is supported. **Example** @@ -449,7 +449,7 @@ Result: ## toDateTimeOrNull -The same as [toDateTime](#todatetime) but returns `NULL` if an invalid argument is received. Only [String](/docs/en/sql-reference/data-types/string.md) argument is supported. +The same as [toDateTime](#todatetime) but returns `NULL` if an invalid argument is received. Only [String](../data-types/string.md) argument is supported. **Example** @@ -470,7 +470,7 @@ Result: ## toDateTimeOrDefault -Like [toDateTime](#todatetime) but if unsuccessful, returns a default value which is either the third argument (if specified), or otherwise the lower boundary of [DateTime](/docs/en/sql-reference/data-types/datetime.md). +Like [toDateTime](#todatetime) but if unsuccessful, returns a default value which is either the third argument (if specified), or otherwise the lower boundary of [DateTime](../data-types/datetime.md). **Syntax** @@ -497,7 +497,7 @@ Result: ## toDate32 -Converts the argument to the [Date32](/docs/en/sql-reference/data-types/date32.md) data type. If the value is outside the range, `toDate32` returns the border values supported by [Date32](/docs/en/sql-reference/data-types/date32.md). If the argument has [Date](/docs/en/sql-reference/data-types/date.md) type, it's borders are taken into account. +Converts the argument to the [Date32](../data-types/date32.md) data type. If the value is outside the range, `toDate32` returns the border values supported by [Date32](../data-types/date32.md). If the argument has [Date](../data-types/date.md) type, it's borders are taken into account. **Syntax** @@ -507,11 +507,11 @@ toDate32(expr) **Arguments** -- `expr` — The value. [String](/docs/en/sql-reference/data-types/string.md), [UInt32](/docs/en/sql-reference/data-types/int-uint.md) or [Date](/docs/en/sql-reference/data-types/date.md). +- `expr` — The value. [String](../data-types/string.md), [UInt32](../data-types/int-uint.md) or [Date](../data-types/date.md). **Returned value** -- A calendar date. Type [Date32](/docs/en/sql-reference/data-types/date32.md). +- A calendar date. Type [Date32](../data-types/date32.md). **Example** @@ -539,7 +539,7 @@ SELECT toDate32('1899-01-01') AS value, toTypeName(value); └────────────┴────────────────────────────────────┘ ``` -3. With [Date](/docs/en/sql-reference/data-types/date.md) argument: +3. With [Date](../data-types/date.md) argument: ``` sql SELECT toDate32(toDate('1899-01-01')) AS value, toTypeName(value); @@ -553,7 +553,7 @@ SELECT toDate32(toDate('1899-01-01')) AS value, toTypeName(value); ## toDate32OrZero -The same as [toDate32](#todate32) but returns the min value of [Date32](/docs/en/sql-reference/data-types/date32.md) if an invalid argument is received. +The same as [toDate32](#todate32) but returns the min value of [Date32](../data-types/date32.md) if an invalid argument is received. **Example** @@ -593,7 +593,7 @@ Result: ## toDate32OrDefault -Converts the argument to the [Date32](/docs/en/sql-reference/data-types/date32.md) data type. If the value is outside the range, `toDate32OrDefault` returns the lower border value supported by [Date32](/docs/en/sql-reference/data-types/date32.md). If the argument has [Date](/docs/en/sql-reference/data-types/date.md) type, it's borders are taken into account. Returns default value if an invalid argument is received. +Converts the argument to the [Date32](../data-types/date32.md) data type. If the value is outside the range, `toDate32OrDefault` returns the lower border value supported by [Date32](../data-types/date32.md). If the argument has [Date](../data-types/date.md) type, it's borders are taken into account. Returns default value if an invalid argument is received. **Example** @@ -615,7 +615,7 @@ Result: ## toDateTime64 -Converts the argument to the [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) data type. +Converts the argument to the [DateTime64](../data-types/datetime64.md) data type. **Syntax** @@ -625,15 +625,13 @@ toDateTime64(expr, scale, [timezone]) **Arguments** -- `expr` — The value. [String](/docs/en/sql-reference/data-types/string.md), [UInt32](/docs/en/sql-reference/data-types/int-uint.md), [Float](/docs/en/sql-reference/data-types/float.md) or [DateTime](/docs/en/sql-reference/data-types/datetime.md). +- `expr` — The value. [String](../data-types/string.md), [UInt32](../data-types/int-uint.md), [Float](../data-types/float.md) or [DateTime](../data-types/datetime.md). - `scale` - Tick size (precision): 10-precision seconds. Valid range: [ 0 : 9 ]. - `timezone` - Time zone of the specified datetime64 object. **Returned value** -- A calendar date and time of day, with sub-second precision. - -Type: [DateTime64](/docs/en/sql-reference/data-types/datetime64.md). +- A calendar date and time of day, with sub-second precision. [DateTime64](../data-types/datetime64.md). **Example** @@ -694,7 +692,7 @@ SELECT toDateTime64('2019-01-01 00:00:00', 3, 'Asia/Istanbul') AS value, toTypeN ## toDecimal(32\|64\|128\|256) -Converts `value` to the [Decimal](/docs/en/sql-reference/data-types/decimal.md) data type with precision of `S`. The `value` can be a number or a string. The `S` (scale) parameter specifies the number of decimal places. +Converts `value` to the [Decimal](../data-types/decimal.md) data type with precision of `S`. The `value` can be a number or a string. The `S` (scale) parameter specifies the number of decimal places. - `toDecimal32(value, S)` - `toDecimal64(value, S)` @@ -703,7 +701,7 @@ Converts `value` to the [Decimal](/docs/en/sql-reference/data-types/decimal.md) ## toDecimal(32\|64\|128\|256)OrNull -Converts an input string to a [Nullable(Decimal(P,S))](/docs/en/sql-reference/data-types/decimal.md) data type value. This family of functions includes: +Converts an input string to a [Nullable(Decimal(P,S))](../data-types/decimal.md) data type value. This family of functions includes: - `toDecimal32OrNull(expr, S)` — Results in `Nullable(Decimal32(S))` data type. - `toDecimal64OrNull(expr, S)` — Results in `Nullable(Decimal64(S))` data type. @@ -714,7 +712,7 @@ These functions should be used instead of `toDecimal*()` functions, if you prefe **Arguments** -- `expr` — [Expression](/docs/en/sql-reference/syntax.md/#syntax-expressions), returns a value in the [String](/docs/en/sql-reference/data-types/string.md) data type. ClickHouse expects the textual representation of the decimal number. For example, `'1.111'`. +- `expr` — [Expression](../syntax.md/#syntax-expressions), returns a value in the [String](../data-types/string.md) data type. ClickHouse expects the textual representation of the decimal number. For example, `'1.111'`. - `S` — Scale, the number of decimal places in the resulting value. **Returned value** @@ -757,7 +755,7 @@ Result: ## toDecimal(32\|64\|128\|256)OrDefault -Converts an input string to a [Decimal(P,S)](/docs/en/sql-reference/data-types/decimal.md) data type value. This family of functions includes: +Converts an input string to a [Decimal(P,S)](../data-types/decimal.md) data type value. This family of functions includes: - `toDecimal32OrDefault(expr, S)` — Results in `Decimal32(S)` data type. - `toDecimal64OrDefault(expr, S)` — Results in `Decimal64(S)` data type. @@ -768,7 +766,7 @@ These functions should be used instead of `toDecimal*()` functions, if you prefe **Arguments** -- `expr` — [Expression](/docs/en/sql-reference/syntax.md/#syntax-expressions), returns a value in the [String](/docs/en/sql-reference/data-types/string.md) data type. ClickHouse expects the textual representation of the decimal number. For example, `'1.111'`. +- `expr` — [Expression](../syntax.md/#syntax-expressions), returns a value in the [String](../data-types/string.md) data type. ClickHouse expects the textual representation of the decimal number. For example, `'1.111'`. - `S` — Scale, the number of decimal places in the resulting value. **Returned value** @@ -810,7 +808,7 @@ Result: ## toDecimal(32\|64\|128\|256)OrZero -Converts an input value to the [Decimal(P,S)](/docs/en/sql-reference/data-types/decimal.md) data type. This family of functions includes: +Converts an input value to the [Decimal(P,S)](../data-types/decimal.md) data type. This family of functions includes: - `toDecimal32OrZero( expr, S)` — Results in `Decimal32(S)` data type. - `toDecimal64OrZero( expr, S)` — Results in `Decimal64(S)` data type. @@ -821,7 +819,7 @@ These functions should be used instead of `toDecimal*()` functions, if you prefe **Arguments** -- `expr` — [Expression](/docs/en/sql-reference/syntax.md/#syntax-expressions), returns a value in the [String](/docs/en/sql-reference/data-types/string.md) data type. ClickHouse expects the textual representation of the decimal number. For example, `'1.111'`. +- `expr` — [Expression](../syntax.md/#syntax-expressions), returns a value in the [String](../data-types/string.md) data type. ClickHouse expects the textual representation of the decimal number. For example, `'1.111'`. - `S` — Scale, the number of decimal places in the resulting value. **Returned value** @@ -921,7 +919,7 @@ Also see the `toUnixTimestamp` function. ## toFixedString(s, N) -Converts a [String](/docs/en/sql-reference/data-types/string.md) type argument to a [FixedString(N)](/docs/en/sql-reference/data-types/fixedstring.md) type (a string of fixed length N). +Converts a [String](../data-types/string.md) type argument to a [FixedString(N)](../data-types/fixedstring.md) type (a string of fixed length N). If the string has fewer bytes than N, it is padded with null bytes to the right. If the string has more bytes than N, an exception is thrown. ## toStringCutToZero(s) @@ -970,14 +968,14 @@ toDecimalString(number, scale) **Arguments** -- `number` — Value to be represented as String, [Int, UInt](/docs/en/sql-reference/data-types/int-uint.md), [Float](/docs/en/sql-reference/data-types/float.md), [Decimal](/docs/en/sql-reference/data-types/decimal.md), -- `scale` — Number of fractional digits, [UInt8](/docs/en/sql-reference/data-types/int-uint.md). - * Maximum scale for [Decimal](/docs/en/sql-reference/data-types/decimal.md) and [Int, UInt](/docs/en/sql-reference/data-types/int-uint.md) types is 77 (it is the maximum possible number of significant digits for Decimal), - * Maximum scale for [Float](/docs/en/sql-reference/data-types/float.md) is 60. +- `number` — Value to be represented as String, [Int, UInt](../data-types/int-uint.md), [Float](../data-types/float.md), [Decimal](../data-types/decimal.md), +- `scale` — Number of fractional digits, [UInt8](../data-types/int-uint.md). + * Maximum scale for [Decimal](../data-types/decimal.md) and [Int, UInt](../data-types/int-uint.md) types is 77 (it is the maximum possible number of significant digits for Decimal), + * Maximum scale for [Float](../data-types/float.md) is 60. **Returned value** -- Input value represented as [String](/docs/en/sql-reference/data-types/string.md) with given number of fractional digits (scale). +- Input value represented as [String](../data-types/string.md) with given number of fractional digits (scale). The number is rounded up or down according to common arithmetic in case requested scale is smaller than original number's scale. **Example** @@ -996,33 +994,689 @@ Result: └─────────────────────────────────────────────┘ ``` -## reinterpretAsUInt(8\|16\|32\|64) +## reinterpretAsUInt8 -## reinterpretAsInt(8\|16\|32\|64) +Performs byte reinterpretation by treating the input value as a value of type UInt8. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. -## reinterpretAsFloat(32\|64) +**Syntax** + +```sql +reinterpretAsUInt8(x) +``` + +**Parameters** + +- `x`: value to byte reinterpret as UInt8. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). + +**Returned value** + +- Reinterpreted value `x` as UInt8. [UInt8](../data-types/int-uint.md/#uint8-uint16-uint32-uint64-uint128-uint256-int8-int16-int32-int64-int128-int256). + +**Example** + +Query: + +```sql +SELECT + toInt8(257) AS x, + toTypeName(x), + reinterpretAsUInt8(x) AS res, + toTypeName(res); +``` + +Result: + +```response +┌─x─┬─toTypeName(x)─┬─res─┬─toTypeName(res)─┐ +│ 1 │ Int8 │ 1 │ UInt8 │ +└───┴───────────────┴─────┴─────────────────┘ +``` + +## reinterpretAsUInt16 + +Performs byte reinterpretation by treating the input value as a value of type UInt16. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. + +**Syntax** + +```sql +reinterpretAsUInt16(x) +``` + +**Parameters** + +- `x`: value to byte reinterpret as UInt16. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). + +**Returned value** + +- Reinterpreted value `x` as UInt16. [UInt16](../data-types/int-uint.md/#uint8-uint16-uint32-uint64-uint128-uint256-int8-int16-int32-int64-int128-int256). + +**Example** + +Query: + +```sql +SELECT + toUInt8(257) AS x, + toTypeName(x), + reinterpretAsUInt16(x) AS res, + toTypeName(res); +``` + +Result: + +```response +┌─x─┬─toTypeName(x)─┬─res─┬─toTypeName(res)─┐ +│ 1 │ UInt8 │ 1 │ UInt16 │ +└───┴───────────────┴─────┴─────────────────┘ +``` + +## reinterpretAsUInt32 + +Performs byte reinterpretation by treating the input value as a value of type UInt32. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. + +**Syntax** + +```sql +reinterpretAsUInt32(x) +``` + +**Parameters** + +- `x`: value to byte reinterpret as UInt32. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). + +**Returned value** + +- Reinterpreted value `x` as UInt32. [UInt32](../data-types/int-uint.md/#uint8-uint16-uint32-uint64-uint128-uint256-int8-int16-int32-int64-int128-int256). + +**Example** + +Query: + +```sql +SELECT + toUInt16(257) AS x, + toTypeName(x), + reinterpretAsUInt32(x) AS res, + toTypeName(res) +``` + +Result: + +```response +┌───x─┬─toTypeName(x)─┬─res─┬─toTypeName(res)─┐ +│ 257 │ UInt16 │ 257 │ UInt32 │ +└─────┴───────────────┴─────┴─────────────────┘ +``` + +## reinterpretAsUInt64 + +Performs byte reinterpretation by treating the input value as a value of type UInt64. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. + +**Syntax** + +```sql +reinterpretAsUInt64(x) +``` + +**Parameters** + +- `x`: value to byte reinterpret as UInt64. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). + +**Returned value** + +- Reinterpreted value `x` as UInt64. [UInt64](../data-types/int-uint.md/#uint8-uint16-uint32-uint64-uint128-uint256-int8-int16-int32-int64-int128-int256). + +**Example** + +Query: + +```sql +SELECT + toUInt32(257) AS x, + toTypeName(x), + reinterpretAsUInt64(x) AS res, + toTypeName(res) +``` + +Result: + +```response +┌───x─┬─toTypeName(x)─┬─res─┬─toTypeName(res)─┐ +│ 257 │ UInt32 │ 257 │ UInt64 │ +└─────┴───────────────┴─────┴─────────────────┘ +``` + +## reinterpretAsUInt128 + +Performs byte reinterpretation by treating the input value as a value of type UInt128. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. + +**Syntax** + +```sql +reinterpretAsUInt128(x) +``` + +**Parameters** + +- `x`: value to byte reinterpret as UInt128. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). + +**Returned value** + +- Reinterpreted value `x` as UInt128. [UInt128](../data-types/int-uint.md/#uint8-uint16-uint32-uint64-uint128-uint256-int8-int16-int32-int64-int128-int256). + +**Example** + +Query: + +```sql +SELECT + toUInt64(257) AS x, + toTypeName(x), + reinterpretAsUInt128(x) AS res, + toTypeName(res) +``` + +Result: + +```response +┌───x─┬─toTypeName(x)─┬─res─┬─toTypeName(res)─┐ +│ 257 │ UInt64 │ 257 │ UInt128 │ +└─────┴───────────────┴─────┴─────────────────┘ +``` + +## reinterpretAsUInt256 + +Performs byte reinterpretation by treating the input value as a value of type UInt256. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. + +**Syntax** + +```sql +reinterpretAsUInt256(x) +``` + +**Parameters** + +- `x`: value to byte reinterpret as UInt256. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). + +**Returned value** + +- Reinterpreted value `x` as UInt256. [UInt256](../data-types/int-uint.md/#uint8-uint16-uint32-uint64-uint128-uint256-int8-int16-int32-int64-int128-int256). + +**Example** + +Query: + +```sql +SELECT + toUInt128(257) AS x, + toTypeName(x), + reinterpretAsUInt256(x) AS res, + toTypeName(res) +``` + +Result: + +```response +┌───x─┬─toTypeName(x)─┬─res─┬─toTypeName(res)─┐ +│ 257 │ UInt128 │ 257 │ UInt256 │ +└─────┴───────────────┴─────┴─────────────────┘ +``` + +## reinterpretAsInt8 + +Performs byte reinterpretation by treating the input value as a value of type Int8. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. + +**Syntax** + +```sql +reinterpretAsInt8(x) +``` + +**Parameters** + +- `x`: value to byte reinterpret as Int8. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). + +**Returned value** + +- Reinterpreted value `x` as Int8. [Int8](../data-types/int-uint.md/#int-ranges). + +**Example** + +Query: + +```sql +SELECT + toUInt8(257) AS x, + toTypeName(x), + reinterpretAsInt8(x) AS res, + toTypeName(res); +``` + +Result: + +```response +┌─x─┬─toTypeName(x)─┬─res─┬─toTypeName(res)─┐ +│ 1 │ UInt8 │ 1 │ Int8 │ +└───┴───────────────┴─────┴─────────────────┘ +``` + +## reinterpretAsInt16 + +Performs byte reinterpretation by treating the input value as a value of type Int16. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. + +**Syntax** + +```sql +reinterpretAsInt16(x) +``` + +**Parameters** + +- `x`: value to byte reinterpret as Int16. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). + +**Returned value** + +- Reinterpreted value `x` as Int16. [Int16](../data-types/int-uint.md/#int-ranges). + +**Example** + +Query: + +```sql +SELECT + toInt8(257) AS x, + toTypeName(x), + reinterpretAsInt16(x) AS res, + toTypeName(res); +``` + +Result: + +```response +┌─x─┬─toTypeName(x)─┬─res─┬─toTypeName(res)─┐ +│ 1 │ Int8 │ 1 │ Int16 │ +└───┴───────────────┴─────┴─────────────────┘ +``` + +## reinterpretAsInt32 + +Performs byte reinterpretation by treating the input value as a value of type Int32. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. + +**Syntax** + +```sql +reinterpretAsInt32(x) +``` + +**Parameters** + +- `x`: value to byte reinterpret as Int32. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). + +**Returned value** + +- Reinterpreted value `x` as Int32. [Int32](../data-types/int-uint.md/#int-ranges). + +**Example** + +Query: + +```sql +SELECT + toInt16(257) AS x, + toTypeName(x), + reinterpretAsInt32(x) AS res, + toTypeName(res); +``` + +Result: + +```response +┌───x─┬─toTypeName(x)─┬─res─┬─toTypeName(res)─┐ +│ 257 │ Int16 │ 257 │ Int32 │ +└─────┴───────────────┴─────┴─────────────────┘ +``` + +## reinterpretAsInt64 + +Performs byte reinterpretation by treating the input value as a value of type Int64. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. + +**Syntax** + +```sql +reinterpretAsInt64(x) +``` + +**Parameters** + +- `x`: value to byte reinterpret as Int64. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). + +**Returned value** + +- Reinterpreted value `x` as Int64. [Int64](../data-types/int-uint.md/#int-ranges). + +**Example** + +Query: + +```sql +SELECT + toInt32(257) AS x, + toTypeName(x), + reinterpretAsInt64(x) AS res, + toTypeName(res); +``` + +Result: + +```response +┌───x─┬─toTypeName(x)─┬─res─┬─toTypeName(res)─┐ +│ 257 │ Int32 │ 257 │ Int64 │ +└─────┴───────────────┴─────┴─────────────────┘ +``` + +## reinterpretAsInt128 + +Performs byte reinterpretation by treating the input value as a value of type Int128. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. + +**Syntax** + +```sql +reinterpretAsInt128(x) +``` + +**Parameters** + +- `x`: value to byte reinterpret as Int128. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). + +**Returned value** + +- Reinterpreted value `x` as Int128. [Int128](../data-types/int-uint.md/#int-ranges). + +**Example** + +Query: + +```sql +SELECT + toInt64(257) AS x, + toTypeName(x), + reinterpretAsInt128(x) AS res, + toTypeName(res); +``` + +Result: + +```response +┌───x─┬─toTypeName(x)─┬─res─┬─toTypeName(res)─┐ +│ 257 │ Int64 │ 257 │ Int128 │ +└─────┴───────────────┴─────┴─────────────────┘ +``` + +## reinterpretAsInt256 + +Performs byte reinterpretation by treating the input value as a value of type Int256. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. + +**Syntax** + +```sql +reinterpretAsInt256(x) +``` + +**Parameters** + +- `x`: value to byte reinterpret as Int256. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). + +**Returned value** + +- Reinterpreted value `x` as Int256. [Int256](../data-types/int-uint.md/#int-ranges). + +**Example** + +Query: + +```sql +SELECT + toInt128(257) AS x, + toTypeName(x), + reinterpretAsInt256(x) AS res, + toTypeName(res); +``` + +Result: + +```response +┌───x─┬─toTypeName(x)─┬─res─┬─toTypeName(res)─┐ +│ 257 │ Int128 │ 257 │ Int256 │ +└─────┴───────────────┴─────┴─────────────────┘ +``` + +## reinterpretAsFloat32 + +Performs byte reinterpretation by treating the input value as a value of type Float32. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. + +**Syntax** + +```sql +reinterpretAsFloat32(x) +``` + +**Parameters** + +- `x`: value to reinterpret as Float32. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). + +**Returned value** + +- Reinterpreted value `x` as Float32. [Float32](../data-types/float.md). + +**Example** + +Query: + +```sql +SELECT reinterpretAsUInt32(toFloat32(0.2)) as x, reinterpretAsFloat32(x); +``` + +Result: + +```response +┌──────────x─┬─reinterpretAsFloat32(x)─┐ +│ 1045220557 │ 0.2 │ +└────────────┴─────────────────────────┘ +``` + +## reinterpretAsFloat64 + +Performs byte reinterpretation by treating the input value as a value of type Float64. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. + +**Syntax** + +```sql +reinterpretAsFloat64(x) +``` + +**Parameters** + +- `x`: value to reinterpret as Float64. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). + +**Returned value** + +- Reinterpreted value `x` as Float64. [Float64](../data-types/float.md). + +**Example** + +Query: + +```sql +SELECT reinterpretAsUInt64(toFloat64(0.2)) as x, reinterpretAsFloat64(x); +``` + +Result: + +```response +┌───────────────────x─┬─reinterpretAsFloat64(x)─┐ +│ 4596373779694328218 │ 0.2 │ +└─────────────────────┴─────────────────────────┘ +``` ## reinterpretAsDate +Accepts a string, fixed string or numeric value and interprets the bytes as a number in host order (little endian). It returns a date from the interpreted number as the number of days since the beginning of the Unix Epoch. + +**Syntax** + +```sql +reinterpretAsDate(x) +``` + +**Parameters** + +- `x`: number of days since the beginning of the Unix Epoch. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). + +**Returned value** + +- Date. [Date](../data-types/date.md). + +**Implementation details** + +:::note +If the provided string isn’t long enough, the function works as if the string is padded with the necessary number of null bytes. If the string is longer than needed, the extra bytes are ignored. +::: + +**Example** + +Query: + +```sql +SELECT reinterpretAsDate(65), reinterpretAsDate('A'); +``` + +Result: + +```response +┌─reinterpretAsDate(65)─┬─reinterpretAsDate('A')─┐ +│ 1970-03-07 │ 1970-03-07 │ +└───────────────────────┴────────────────────────┘ +``` + ## reinterpretAsDateTime -These functions accept a string and interpret the bytes placed at the beginning of the string as a number in host order (little endian). If the string isn’t long enough, the functions work as if the string is padded with the necessary number of null bytes. If the string is longer than needed, the extra bytes are ignored. A date is interpreted as the number of days since the beginning of the Unix Epoch, and a date with time is interpreted as the number of seconds since the beginning of the Unix Epoch. +These functions accept a string and interpret the bytes placed at the beginning of the string as a number in host order (little endian). Returns a date with time interpreted as the number of seconds since the beginning of the Unix Epoch. + +**Syntax** + +```sql +reinterpretAsDateTime(x) +``` + +**Parameters** + +- `x`: number of seconds since the beginning of the Unix Epoch. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). + +**Returned value** + +- Date and Time. [DateTime](../data-types/datetime.md). + +**Implementation details** + +:::note +If the provided string isn’t long enough, the function works as if the string is padded with the necessary number of null bytes. If the string is longer than needed, the extra bytes are ignored. +::: + +**Example** + +Query: + +```sql +SELECT reinterpretAsDateTime(65), reinterpretAsDateTime('A'); +``` + +Result: + +```response +┌─reinterpretAsDateTime(65)─┬─reinterpretAsDateTime('A')─┐ +│ 1970-01-01 01:01:05 │ 1970-01-01 01:01:05 │ +└───────────────────────────┴────────────────────────────┘ +``` ## reinterpretAsString -This function accepts a number or date or date with time and returns a string containing bytes representing the corresponding value in host order (little endian). Null bytes are dropped from the end. For example, a UInt32 type value of 255 is a string that is one byte long. +This function accepts a number, date or date with time and returns a string containing bytes representing the corresponding value in host order (little endian). Null bytes are dropped from the end. For example, a UInt32 type value of 255 is a string that is one byte long. + +**Syntax** + +```sql +reinterpretAsString(x) +``` + +**Parameters** + +- `x`: value to reinterpret to string. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md). + +**Returned value** + +- String containing bytes representing `x`. [String](../data-types/fixedstring.md). + +**Example** + +Query: + +```sql +SELECT + reinterpretAsString(toDateTime('1970-01-01 01:01:05')), + reinterpretAsString(toDate('1970-03-07')); +``` + +Result: + +```response +┌─reinterpretAsString(toDateTime('1970-01-01 01:01:05'))─┬─reinterpretAsString(toDate('1970-03-07'))─┐ +│ A │ A │ +└────────────────────────────────────────────────────────┴───────────────────────────────────────────┘ +``` ## reinterpretAsFixedString -This function accepts a number or date or date with time and returns a FixedString containing bytes representing the corresponding value in host order (little endian). Null bytes are dropped from the end. For example, a UInt32 type value of 255 is a FixedString that is one byte long. +This function accepts a number, date or date with time and returns a FixedString containing bytes representing the corresponding value in host order (little endian). Null bytes are dropped from the end. For example, a UInt32 type value of 255 is a FixedString that is one byte long. + +**Syntax** + +```sql +reinterpretAsFixedString(x) +``` + +**Parameters** + +- `x`: value to reinterpret to string. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md). + +**Returned value** + +- Fixed string containing bytes representing `x`. [FixedString](../data-types/fixedstring.md). + +**Example** + +Query: + +```sql +SELECT + reinterpretAsFixedString(toDateTime('1970-01-01 01:01:05')), + reinterpretAsFixedString(toDate('1970-03-07')); +``` + +Result: + +```response +┌─reinterpretAsFixedString(toDateTime('1970-01-01 01:01:05'))─┬─reinterpretAsFixedString(toDate('1970-03-07'))─┐ +│ A │ A │ +└─────────────────────────────────────────────────────────────┴────────────────────────────────────────────────┘ +``` ## reinterpretAsUUID :::note -In addition to the UUID functions listed here, there is dedicated [UUID function documentation](/docs/en/sql-reference/functions/uuid-functions.md). +In addition to the UUID functions listed here, there is dedicated [UUID function documentation](../functions/uuid-functions.md). ::: -Accepts 16 bytes string and returns UUID containing bytes representing the corresponding value in network byte order (big-endian). If the string isn't long enough, the function works as if the string is padded with the necessary number of null bytes to the end. If the string is longer than 16 bytes, the extra bytes at the end are ignored. +Accepts a 16 byte string and returns a UUID containing bytes representing the corresponding value in network byte order (big-endian). If the string isn't long enough, the function works as if the string is padded with the necessary number of null bytes to the end. If the string is longer than 16 bytes, the extra bytes at the end are ignored. **Syntax** @@ -1032,11 +1686,11 @@ reinterpretAsUUID(fixed_string) **Arguments** -- `fixed_string` — Big-endian byte string. [FixedString](/docs/en/sql-reference/data-types/fixedstring.md/#fixedstring). +- `fixed_string` — Big-endian byte string. [FixedString](../data-types/fixedstring.md/#fixedstring). **Returned value** -- The UUID type value. [UUID](/docs/en/sql-reference/data-types/uuid.md/#uuid-data-type). +- The UUID type value. [UUID](../data-types/uuid.md/#uuid-data-type). **Examples** @@ -1089,7 +1743,7 @@ reinterpret(x, type) **Arguments** - `x` — Any type. -- `type` — Destination type. [String](/docs/en/sql-reference/data-types/string.md). +- `type` — Destination type. [String](../data-types/string.md). **Returned value** @@ -1128,7 +1782,7 @@ x::t **Arguments** - `x` — A value to convert. May be of any type. -- `T` — The name of the target data type. [String](/docs/en/sql-reference/data-types/string.md). +- `T` — The name of the target data type. [String](../data-types/string.md). - `t` — The target data type. **Returned value** @@ -1177,9 +1831,9 @@ Result: └─────────────────────┴─────────────────────┴────────────┴─────────────────────┴───────────────────────────┘ ``` -Conversion to [FixedString (N)](/docs/en/sql-reference/data-types/fixedstring.md) only works for arguments of type [String](/docs/en/sql-reference/data-types/string.md) or [FixedString](/docs/en/sql-reference/data-types/fixedstring.md). +Conversion to [FixedString (N)](../data-types/fixedstring.md) only works for arguments of type [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). -Type conversion to [Nullable](/docs/en/sql-reference/data-types/nullable.md) and back is supported. +Type conversion to [Nullable](../data-types/nullable.md) and back is supported. **Example** @@ -1253,7 +1907,7 @@ Code: 70. DB::Exception: Received from localhost:9000. DB::Exception: Value in c ## accurateCastOrNull(x, T) -Converts input value `x` to the specified data type `T`. Always returns [Nullable](/docs/en/sql-reference/data-types/nullable.md) type and returns [NULL](/docs/en/sql-reference/syntax.md/#null-literal) if the casted value is not representable in the target type. +Converts input value `x` to the specified data type `T`. Always returns [Nullable](../data-types/nullable.md) type and returns [NULL](../syntax.md/#null-literal) if the casted value is not representable in the target type. **Syntax** @@ -1362,7 +2016,7 @@ Result: ## toInterval(Year\|Quarter\|Month\|Week\|Day\|Hour\|Minute\|Second) -Converts a Number type argument to an [Interval](/docs/en/sql-reference/data-types/special-data-types/interval.md) data type. +Converts a Number type argument to an [Interval](../data-types/special-data-types/interval.md) data type. **Syntax** @@ -1409,9 +2063,9 @@ Result: ## parseDateTime {#type_conversion_functions-parseDateTime} -Converts a [String](/docs/en/sql-reference/data-types/string.md) to [DateTime](/docs/en/sql-reference/data-types/datetime.md) according to a [MySQL format string](https://dev.mysql.com/doc/refman/8.0/en/date-and-time-functions.html#function_date-format). +Converts a [String](../data-types/string.md) to [DateTime](../data-types/datetime.md) according to a [MySQL format string](https://dev.mysql.com/doc/refman/8.0/en/date-and-time-functions.html#function_date-format). -This function is the opposite operation of function [formatDateTime](/docs/en/sql-reference/functions/date-time-functions.md#date_time_functions-formatDateTime). +This function is the opposite operation of function [formatDateTime](../functions/date-time-functions.md#date_time_functions-formatDateTime). **Syntax** @@ -1431,7 +2085,7 @@ Returns DateTime values parsed from input string according to a MySQL style form **Supported format specifiers** -All format specifiers listed in [formatDateTime](/docs/en/sql-reference/functions/date-time-functions.md#date_time_functions-formatDateTime) except: +All format specifiers listed in [formatDateTime](../functions/date-time-functions.md#date_time_functions-formatDateTime) except: - %Q: Quarter (1-4) **Example** @@ -1460,7 +2114,7 @@ Alias: `str_to_date`. Similar to [parseDateTime](#parsedatetime), except that the format string is in [Joda](https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html) instead of MySQL syntax. -This function is the opposite operation of function [formatDateTimeInJodaSyntax](/docs/en/sql-reference/functions/date-time-functions.md#date_time_functions-formatDateTimeInJodaSyntax). +This function is the opposite operation of function [formatDateTimeInJodaSyntax](../functions/date-time-functions.md#date_time_functions-formatDateTimeInJodaSyntax). **Syntax** @@ -1480,7 +2134,7 @@ Returns DateTime values parsed from input string according to a Joda style forma **Supported format specifiers** -All format specifiers listed in [formatDateTimeInJoda](/docs/en/sql-reference/functions/date-time-functions.md#date_time_functions-formatDateTime) are supported, except: +All format specifiers listed in [formatDateTimeInJoda](../functions/date-time-functions.md#date_time_functions-formatDateTime) are supported, except: - S: fraction of second - z: time zone - Z: time zone offset/id @@ -1506,7 +2160,7 @@ Same as for [parseDateTimeInJodaSyntax](#type_conversion_functions-parseDateTime ## parseDateTimeBestEffort ## parseDateTime32BestEffort -Converts a date and time in the [String](/docs/en/sql-reference/data-types/string.md) representation to [DateTime](/docs/en/sql-reference/data-types/datetime.md/#data_type-datetime) data type. +Converts a date and time in the [String](../data-types/string.md) representation to [DateTime](../data-types/datetime.md/#data_type-datetime) data type. The function parses [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601), [RFC 1123 - 5.2.14 RFC-822 Date and Time Specification](https://tools.ietf.org/html/rfc1123#page-55), ClickHouse’s and some other date and time formats. @@ -1518,8 +2172,8 @@ parseDateTimeBestEffort(time_string [, time_zone]) **Arguments** -- `time_string` — String containing a date and time to convert. [String](/docs/en/sql-reference/data-types/string.md). -- `time_zone` — Time zone. The function parses `time_string` according to the time zone. [String](/docs/en/sql-reference/data-types/string.md). +- `time_string` — String containing a date and time to convert. [String](../data-types/string.md). +- `time_zone` — Time zone. The function parses `time_string` according to the time zone. [String](../data-types/string.md). **Supported non-standard formats** @@ -1535,7 +2189,7 @@ If the year is not specified, it is considered to be equal to the current year. **Returned value** -- `time_string` converted to the [DateTime](/docs/en/sql-reference/data-types/datetime.md) data type. +- `time_string` converted to the [DateTime](../data-types/datetime.md) data type. **Examples** @@ -1667,7 +2321,7 @@ Same as [parseDateTimeBestEffortUS](#parsedatetimebesteffortUS) function except ## parseDateTime64BestEffort -Same as [parseDateTimeBestEffort](#parsedatetimebesteffort) function but also parse milliseconds and microseconds and returns [DateTime](/docs/en/sql-reference/functions/type-conversion-functions.md/#data_type-datetime) data type. +Same as [parseDateTimeBestEffort](#parsedatetimebesteffort) function but also parse milliseconds and microseconds and returns [DateTime](../functions/type-conversion-functions.md/#data_type-datetime) data type. **Syntax** @@ -1677,13 +2331,13 @@ parseDateTime64BestEffort(time_string [, precision [, time_zone]]) **Arguments** -- `time_string` — String containing a date or date with time to convert. [String](/docs/en/sql-reference/data-types/string.md). -- `precision` — Required precision. `3` — for milliseconds, `6` — for microseconds. Default — `3`. Optional. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](/docs/en/sql-reference/data-types/string.md). +- `time_string` — String containing a date or date with time to convert. [String](../data-types/string.md). +- `precision` — Required precision. `3` — for milliseconds, `6` — for microseconds. Default — `3`. Optional. [UInt8](../data-types/int-uint.md). +- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](../data-types/string.md). **Returned value** -- `time_string` converted to the [DateTime](/docs/en/sql-reference/data-types/datetime.md) data type. +- `time_string` converted to the [DateTime](../data-types/datetime.md) data type. **Examples** @@ -1733,7 +2387,7 @@ Same as for [parseDateTime64BestEffort](#parsedatetime64besteffort), except that ## toLowCardinality -Converts input parameter to the [LowCardinality](/docs/en/sql-reference/data-types/lowcardinality.md) version of same data type. +Converts input parameter to the [LowCardinality](../data-types/lowcardinality.md) version of same data type. To convert data from the `LowCardinality` data type use the [CAST](#type_conversion_function-cast) function. For example, `CAST(x as String)`. @@ -1745,13 +2399,11 @@ toLowCardinality(expr) **Arguments** -- `expr` — [Expression](/docs/en/sql-reference/syntax.md/#syntax-expressions) resulting in one of the [supported data types](/docs/en/sql-reference/data-types/index.md/#data_types). +- `expr` — [Expression](../syntax.md/#syntax-expressions) resulting in one of the [supported data types](../data-types/index.md/#data_types). **Returned values** -- Result of `expr`. - -Type: `LowCardinality(expr_result_type)` +- Result of `expr`. [LowCardinality](../data-types/lowcardinality.md) of the type of `expr`. **Example** @@ -1979,143 +2631,3 @@ Result: │ 2,"good" │ └───────────────────────────────────────────┘ ``` - -## snowflakeToDateTime - -Extracts the timestamp component of a [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) in [DateTime](/docs/en/sql-reference/data-types/datetime.md) format. - -**Syntax** - -``` sql -snowflakeToDateTime(value[, time_zone]) -``` - -**Arguments** - -- `value` — Snowflake ID. [Int64](/docs/en/sql-reference/data-types/int-uint.md). -- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](/docs/en/sql-reference/data-types/string.md). - -**Returned value** - -- The timestamp component of `value` as a [DateTime](/docs/en/sql-reference/data-types/datetime.md) value. - -**Example** - -Query: - -``` sql -SELECT snowflakeToDateTime(CAST('1426860702823350272', 'Int64'), 'UTC'); -``` - -Result: - -```response - -┌─snowflakeToDateTime(CAST('1426860702823350272', 'Int64'), 'UTC')─┐ -│ 2021-08-15 10:57:56 │ -└──────────────────────────────────────────────────────────────────┘ -``` - -## snowflakeToDateTime64 - -Extracts the timestamp component of a [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) in [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) format. - -**Syntax** - -``` sql -snowflakeToDateTime64(value[, time_zone]) -``` - -**Arguments** - -- `value` — Snowflake ID. [Int64](/docs/en/sql-reference/data-types/int-uint.md). -- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](/docs/en/sql-reference/data-types/string.md). - -**Returned value** - -- The timestamp component of `value` as a [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) with scale = 3, i.e. millisecond precision. - -**Example** - -Query: - -``` sql -SELECT snowflakeToDateTime64(CAST('1426860802823350272', 'Int64'), 'UTC'); -``` - -Result: - -```response - -┌─snowflakeToDateTime64(CAST('1426860802823350272', 'Int64'), 'UTC')─┐ -│ 2021-08-15 10:58:19.841 │ -└────────────────────────────────────────────────────────────────────┘ -``` - -## dateTimeToSnowflake - -Converts a [DateTime](/docs/en/sql-reference/data-types/datetime.md) value to the first [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) at the giving time. - -**Syntax** - -``` sql -dateTimeToSnowflake(value) -``` - -**Arguments** - -- `value` — Date with time. [DateTime](/docs/en/sql-reference/data-types/datetime.md). - -**Returned value** - -- Input value converted to the [Int64](/docs/en/sql-reference/data-types/int-uint.md) data type as the first Snowflake ID at that time. - -**Example** - -Query: - -``` sql -WITH toDateTime('2021-08-15 18:57:56', 'Asia/Shanghai') AS dt SELECT dateTimeToSnowflake(dt); -``` - -Result: - -```response -┌─dateTimeToSnowflake(dt)─┐ -│ 1426860702823350272 │ -└─────────────────────────┘ -``` - -## dateTime64ToSnowflake - -Convert a [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) to the first [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) at the giving time. - -**Syntax** - -``` sql -dateTime64ToSnowflake(value) -``` - -**Arguments** - -- `value` — Date with time. [DateTime64](/docs/en/sql-reference/data-types/datetime64.md). - -**Returned value** - -- Input value converted to the [Int64](/docs/en/sql-reference/data-types/int-uint.md) data type as the first Snowflake ID at that time. - -**Example** - -Query: - -``` sql -WITH toDateTime64('2021-08-15 18:57:56.492', 3, 'Asia/Shanghai') AS dt64 SELECT dateTime64ToSnowflake(dt64); -``` - -Result: - -```response -┌─dateTime64ToSnowflake(dt64)─┐ -│ 1426860704886947840 │ -└─────────────────────────────┘ -``` diff --git a/docs/en/sql-reference/functions/ulid-functions.md b/docs/en/sql-reference/functions/ulid-functions.md index eb69b1779ae..dc6a803d638 100644 --- a/docs/en/sql-reference/functions/ulid-functions.md +++ b/docs/en/sql-reference/functions/ulid-functions.md @@ -18,7 +18,7 @@ generateULID([x]) **Arguments** -- `x` — [Expression](../../sql-reference/syntax.md#syntax-expressions) resulting in any of the [supported data types](../../sql-reference/data-types/index.md#data_types). The resulting value is discarded, but the expression itself if used for bypassing [common subexpression elimination](../../sql-reference/functions/index.md#common-subexpression-elimination) if the function is called multiple times in one query. Optional parameter. +- `x` — [Expression](../../sql-reference/syntax.md#syntax-expressions) resulting in any of the [supported data types](../data-types/index.md#data_types). The resulting value is discarded, but the expression itself if used for bypassing [common subexpression elimination](../../sql-reference/functions/index.md#common-subexpression-elimination) if the function is called multiple times in one query. Optional parameter. **Returned value** @@ -60,14 +60,12 @@ ULIDStringToDateTime(ulid[, timezone]) **Arguments** -- `ulid` — Input ULID. [String](/docs/en/sql-reference/data-types/string.md) or [FixedString(26)](/docs/en/sql-reference/data-types/fixedstring.md). -- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). [String](../../sql-reference/data-types/string.md). +- `ulid` — Input ULID. [String](../data-types/string.md) or [FixedString(26)](../data-types/fixedstring.md). +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). [String](../data-types/string.md). **Returned value** -- Timestamp with milliseconds precision. - -Type: [DateTime64(3)](/docs/en/sql-reference/data-types/datetime64.md). +- Timestamp with milliseconds precision. [DateTime64(3)](../data-types/datetime64.md). **Usage example** diff --git a/docs/en/sql-reference/functions/url-functions.md b/docs/en/sql-reference/functions/url-functions.md index cf2940d63e1..47890e0b271 100644 --- a/docs/en/sql-reference/functions/url-functions.md +++ b/docs/en/sql-reference/functions/url-functions.md @@ -16,7 +16,7 @@ If the relevant part isn’t present in a URL, an empty string is returned. Extracts the protocol from a URL. -Examples of typical returned values: http, https, ftp, mailto, tel, magnet… +Examples of typical returned values: http, https, ftp, mailto, tel, magnet... ### domain @@ -28,7 +28,7 @@ domain(url) **Arguments** -- `url` — URL. Type: [String](../../sql-reference/data-types/string.md). +- `url` — URL. [String](../data-types/string.md). The URL can be specified with or without a scheme. Examples: @@ -48,10 +48,7 @@ clickhouse.com **Returned values** -- Host name. If ClickHouse can parse the input string as a URL. -- Empty string. If ClickHouse can’t parse the input string as a URL. - -Type: `String`. +- Host name if ClickHouse can parse the input string as a URL, otherwise an empty string. [String](../data-types/string.md). **Example** @@ -79,7 +76,7 @@ topLevelDomain(url) **Arguments** -- `url` — URL. Type: [String](../../sql-reference/data-types/string.md). +- `url` — URL. [String](../data-types/string.md). The URL can be specified with or without a scheme. Examples: @@ -91,10 +88,7 @@ https://clickhouse.com/time/ **Returned values** -- Domain name. If ClickHouse can parse the input string as a URL. -- Empty string. If ClickHouse cannot parse the input string as a URL. - -Type: `String`. +- Domain name if ClickHouse can parse the input string as a URL. Otherwise, an empty string. [String](../data-types/string.md). **Example** @@ -157,14 +151,12 @@ cutToFirstSignificantSubdomainCustom(URL, TLD) **Arguments** -- `URL` — URL. [String](../../sql-reference/data-types/string.md). -- `TLD` — Custom TLD list name. [String](../../sql-reference/data-types/string.md). +- `URL` — URL. [String](../data-types/string.md). +- `TLD` — Custom TLD list name. [String](../data-types/string.md). **Returned value** -- Part of the domain that includes top-level subdomains up to the first significant subdomain. - -Type: [String](../../sql-reference/data-types/string.md). +- Part of the domain that includes top-level subdomains up to the first significant subdomain. [String](../data-types/string.md). **Example** @@ -211,14 +203,12 @@ cutToFirstSignificantSubdomainCustomWithWWW(URL, TLD) **Arguments** -- `URL` — URL. [String](../../sql-reference/data-types/string.md). -- `TLD` — Custom TLD list name. [String](../../sql-reference/data-types/string.md). +- `URL` — URL. [String](../data-types/string.md). +- `TLD` — Custom TLD list name. [String](../data-types/string.md). **Returned value** -- Part of the domain that includes top-level subdomains up to the first significant subdomain without stripping `www`. - -Type: [String](../../sql-reference/data-types/string.md). +- Part of the domain that includes top-level subdomains up to the first significant subdomain without stripping `www`. [String](../data-types/string.md). **Example** @@ -265,14 +255,12 @@ firstSignificantSubdomainCustom(URL, TLD) **Arguments** -- `URL` — URL. [String](../../sql-reference/data-types/string.md). -- `TLD` — Custom TLD list name. [String](../../sql-reference/data-types/string.md). +- `URL` — URL. [String](../data-types/string.md). +- `TLD` — Custom TLD list name. [String](../data-types/string.md). **Returned value** -- First significant subdomain. - -Type: [String](../../sql-reference/data-types/string.md). +- First significant subdomain. [String](../data-types/string.md). **Example** @@ -418,13 +406,11 @@ netloc(URL) **Arguments** -- `url` — URL. [String](../../sql-reference/data-types/string.md). +- `url` — URL. [String](../data-types/string.md). **Returned value** -- `username:password@host:port`. - -Type: `String`. +- `username:password@host:port`. [String](../data-types/string.md). **Example** @@ -474,14 +460,12 @@ cutURLParameter(URL, name) **Arguments** -- `url` — URL. [String](../../sql-reference/data-types/string.md). -- `name` — name of URL parameter. [String](../../sql-reference/data-types/string.md) or [Array](../../sql-reference/data-types/array.md) of Strings. +- `url` — URL. [String](../data-types/string.md). +- `name` — name of URL parameter. [String](../data-types/string.md) or [Array](../data-types/array.md) of Strings. **Returned value** -- URL with `name` URL parameter removed. - -Type: `String`. +- URL with `name` URL parameter removed. [String](../data-types/string.md). **Example** diff --git a/docs/en/sql-reference/functions/uuid-functions.md b/docs/en/sql-reference/functions/uuid-functions.md index d1b833c2439..2707f0bf8d4 100644 --- a/docs/en/sql-reference/functions/uuid-functions.md +++ b/docs/en/sql-reference/functions/uuid-functions.md @@ -18,7 +18,7 @@ generateUUIDv4([expr]) **Arguments** -- `expr` — An arbitrary [expression](../../sql-reference/syntax.md#syntax-expressions) used to bypass [common subexpression elimination](../../sql-reference/functions/index.md#common-subexpression-elimination) if the function is called multiple times in a query. The value of the expression has no effect on the returned UUID. Optional. +- `expr` — An arbitrary [expression](../syntax.md#syntax-expressions) used to bypass [common subexpression elimination](../functions/index.md#common-subexpression-elimination) if the function is called multiple times in a query. The value of the expression has no effect on the returned UUID. Optional. **Returned value** @@ -90,7 +90,7 @@ generateUUIDv7([expr]) **Arguments** -- `expr` — An arbitrary [expression](../../sql-reference/syntax.md#syntax-expressions) used to bypass [common subexpression elimination](../../sql-reference/functions/index.md#common-subexpression-elimination) if the function is called multiple times in a query. The value of the expression has no effect on the returned UUID. Optional. +- `expr` — An arbitrary [expression](../syntax.md#syntax-expressions) used to bypass [common subexpression elimination](../functions/index.md#common-subexpression-elimination) if the function is called multiple times in a query. The value of the expression has no effect on the returned UUID. Optional. **Returned value** @@ -163,7 +163,7 @@ generateUUIDv7ThreadMonotonic([expr]) **Arguments** -- `expr` — An arbitrary [expression](../../sql-reference/syntax.md#syntax-expressions) used to bypass [common subexpression elimination](../../sql-reference/functions/index.md#common-subexpression-elimination) if the function is called multiple times in a query. The value of the expression has no effect on the returned UUID. Optional. +- `expr` — An arbitrary [expression](../syntax.md#syntax-expressions) used to bypass [common subexpression elimination](../functions/index.md#common-subexpression-elimination) if the function is called multiple times in a query. The value of the expression has no effect on the returned UUID. Optional. **Returned value** @@ -233,7 +233,7 @@ generateUUIDv7NonMonotonic([expr]) **Arguments** -- `expr` — An arbitrary [expression](../../sql-reference/syntax.md#syntax-expressions) used to bypass [common subexpression elimination](../../sql-reference/functions/index.md#common-subexpression-elimination) if the function is called multiple times in a query. The value of the expression has no effect on the returned UUID. Optional. +- `expr` — An arbitrary [expression](../syntax.md#syntax-expressions) used to bypass [common subexpression elimination](../functions/index.md#common-subexpression-elimination) if the function is called multiple times in a query. The value of the expression has no effect on the returned UUID. Optional. **Returned value** @@ -289,9 +289,7 @@ The function also works for [Arrays](array-functions.md#function-empty) and [Str **Returned value** -- Returns `1` for an empty UUID or `0` for a non-empty UUID. - -Type: [UInt8](../data-types/int-uint.md). +- Returns `1` for an empty UUID or `0` for a non-empty UUID. [UInt8](../data-types/int-uint.md). **Example** @@ -331,9 +329,7 @@ The function also works for [Arrays](array-functions.md#function-notempty) or [S **Returned value** -- Returns `1` for a non-empty UUID or `0` for an empty UUID. - -Type: [UInt8](../data-types/int-uint.md). +- Returns `1` for a non-empty UUID or `0` for an empty UUID. [UInt8](../data-types/int-uint.md). **Example** @@ -383,8 +379,8 @@ Result: **Arguments** -- `string` — String of 36 characters or FixedString(36). [String](../../sql-reference/syntax.md#string). -- `default` — UUID to be used as the default if the first argument cannot be converted to a UUID type. [UUID](/docs/en/sql-reference/data-types/uuid.md). +- `string` — String of 36 characters or FixedString(36). [String](../syntax.md#string). +- `default` — UUID to be used as the default if the first argument cannot be converted to a UUID type. [UUID](../data-types/uuid.md). **Returned value** @@ -482,7 +478,7 @@ Result: ## UUIDStringToNum -Accepts `string` containing 36 characters in the format `xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx`, and returns a [FixedString(16)](../../sql-reference/data-types/fixedstring.md) as its binary representation, with its format optionally specified by `variant` (`Big-endian` by default). +Accepts `string` containing 36 characters in the format `xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx`, and returns a [FixedString(16)](../data-types/fixedstring.md) as its binary representation, with its format optionally specified by `variant` (`Big-endian` by default). **Syntax** @@ -492,7 +488,7 @@ UUIDStringToNum(string[, variant = 1]) **Arguments** -- `string` — A [String](../../sql-reference/syntax.md#syntax-string-literal) of 36 characters or [FixedString](../../sql-reference/syntax.md#syntax-string-literal) +- `string` — A [String](../syntax.md#syntax-string-literal) of 36 characters or [FixedString](../syntax.md#syntax-string-literal) - `variant` — Integer, representing a variant as specified by [RFC4122](https://datatracker.ietf.org/doc/html/rfc4122#section-4.1.1). 1 = `Big-endian` (default), 2 = `Microsoft`. **Returned value** @@ -541,7 +537,7 @@ UUIDNumToString(binary[, variant = 1]) **Arguments** -- `binary` — [FixedString(16)](../../sql-reference/data-types/fixedstring.md) as a binary representation of a UUID. +- `binary` — [FixedString(16)](../data-types/fixedstring.md) as a binary representation of a UUID. - `variant` — Integer, representing a variant as specified by [RFC4122](https://datatracker.ietf.org/doc/html/rfc4122#section-4.1.1). 1 = `Big-endian` (default), 2 = `Microsoft`. **Returned value** @@ -580,7 +576,7 @@ Result: ## UUIDToNum -Accepts a [UUID](../../sql-reference/data-types/uuid.md) and returns its binary representation as a [FixedString(16)](../../sql-reference/data-types/fixedstring.md), with its format optionally specified by `variant` (`Big-endian` by default). This function replaces calls to two separate functions `UUIDStringToNum(toString(uuid))` so no intermediate conversion from UUID to string is required to extract bytes from a UUID. +Accepts a [UUID](../data-types/uuid.md) and returns its binary representation as a [FixedString(16)](../data-types/fixedstring.md), with its format optionally specified by `variant` (`Big-endian` by default). This function replaces calls to two separate functions `UUIDStringToNum(toString(uuid))` so no intermediate conversion from UUID to string is required to extract bytes from a UUID. **Syntax** @@ -640,13 +636,11 @@ UUIDv7ToDateTime(uuid[, timezone]) **Arguments** - `uuid` — [UUID](../data-types/uuid.md) of version 7. -- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). [String](../../sql-reference/data-types/string.md). +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). [String](../data-types/string.md). **Returned value** -- Timestamp with milliseconds precision. If the UUID is not a valid version 7 UUID, it returns 1970-01-01 00:00:00.000. - -Type: [DateTime64(3)](/docs/en/sql-reference/data-types/datetime64.md). +- Timestamp with milliseconds precision. If the UUID is not a valid version 7 UUID, it returns 1970-01-01 00:00:00.000. [DateTime64(3)](../data-types/datetime64.md). **Usage examples** @@ -674,7 +668,7 @@ Result: └──────────────────────────────────────────────────────────────────────────────────────┘ ``` -## serverUUID() +## serverUUID Returns the random UUID generated during the first start of the ClickHouse server. The UUID is stored in file `uuid` in the ClickHouse server directory (e.g. `/var/lib/clickhouse/`) and retained between server restarts. @@ -686,10 +680,277 @@ serverUUID() **Returned value** -- The UUID of the server. +- The UUID of the server. [UUID](../data-types/uuid.md). -Type: [UUID](../data-types/uuid.md). +## generateSnowflakeID + +Generates a [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID). + +The generated Snowflake ID contains the current Unix timestamp in milliseconds 41 (+ 1 top zero bit) bits, followed by machine id (10 bits), a counter (12 bits) to distinguish IDs within a millisecond. +For any given timestamp (unix_ts_ms), the counter starts at 0 and is incremented by 1 for each new Snowflake ID until the timestamp changes. +In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to 0. + +Function `generateSnowflakeID` guarantees that the counter field within a timestamp increments monotonically across all function invocations in concurrently running threads and queries. + +``` + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤ +|0| timestamp | +├─┼ ┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤ +| | machine_id | machine_seq_num | +└─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┘ +``` + +**Syntax** + +``` sql +generateSnowflakeID([expr]) +``` + +**Arguments** + +- `expr` — An arbitrary [expression](../../sql-reference/syntax.md#syntax-expressions) used to bypass [common subexpression elimination](../../sql-reference/functions/index.md#common-subexpression-elimination) if the function is called multiple times in a query. The value of the expression has no effect on the returned Snowflake ID. Optional. + +**Returned value** + +A value of type UInt64. + +**Example** + +First, create a table with a column of type UInt64, then insert a generated Snowflake ID into the table. + +``` sql +CREATE TABLE tab (id UInt64) ENGINE = Memory; + +INSERT INTO tab SELECT generateSnowflakeID(); + +SELECT * FROM tab; +``` + +Result: + +```response +┌──────────────────id─┐ +│ 7199081390080409600 │ +└─────────────────────┘ +``` + +**Example with multiple Snowflake IDs generated per row** + +```sql +SELECT generateSnowflakeID(1), generateSnowflakeID(2); + +┌─generateSnowflakeID(1)─┬─generateSnowflakeID(2)─┐ +│ 7199081609652224000 │ 7199081609652224001 │ +└────────────────────────┴────────────────────────┘ +``` + +## generateSnowflakeIDThreadMonotonic + +Generates a [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID). + +The generated Snowflake ID contains the current Unix timestamp in milliseconds 41 (+ 1 top zero bit) bits, followed by machine id (10 bits), a counter (12 bits) to distinguish IDs within a millisecond. +For any given timestamp (unix_ts_ms), the counter starts at 0 and is incremented by 1 for each new Snowflake ID until the timestamp changes. +In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to 0. + +This function behaves like `generateSnowflakeID` but gives no guarantee on counter monotony across different simultaneous requests. +Monotonicity within one timestamp is guaranteed only within the same thread calling this function to generate Snowflake IDs. + +``` + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤ +|0| timestamp | +├─┼ ┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤ +| | machine_id | machine_seq_num | +└─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┘ +``` + +**Syntax** + +``` sql +generateSnowflakeIDThreadMonotonic([expr]) +``` + +**Arguments** + +- `expr` — An arbitrary [expression](../../sql-reference/syntax.md#syntax-expressions) used to bypass [common subexpression elimination](../../sql-reference/functions/index.md#common-subexpression-elimination) if the function is called multiple times in a query. The value of the expression has no effect on the returned Snowflake ID. Optional. + +**Returned value** + +A value of type UInt64. + +**Example** + +First, create a table with a column of type UInt64, then insert a generated Snowflake ID into the table. + +``` sql +CREATE TABLE tab (id UInt64) ENGINE = Memory; + +INSERT INTO tab SELECT generateSnowflakeIDThreadMonotonic(); + +SELECT * FROM tab; +``` + +Result: + +```response +┌──────────────────id─┐ +│ 7199082832006627328 │ +└─────────────────────┘ +``` + +**Example with multiple Snowflake IDs generated per row** + +```sql +SELECT generateSnowflakeIDThreadMonotonic(1), generateSnowflakeIDThreadMonotonic(2); + +┌─generateSnowflakeIDThreadMonotonic(1)─┬─generateSnowflakeIDThreadMonotonic(2)─┐ +│ 7199082940311945216 │ 7199082940316139520 │ +└───────────────────────────────────────┴───────────────────────────────────────┘ +``` + +## snowflakeToDateTime + +Extracts the timestamp component of a [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) in [DateTime](../data-types/datetime.md) format. + +**Syntax** + +``` sql +snowflakeToDateTime(value[, time_zone]) +``` + +**Arguments** + +- `value` — Snowflake ID. [Int64](../data-types/int-uint.md). +- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](../data-types/string.md). + +**Returned value** + +- The timestamp component of `value` as a [DateTime](../data-types/datetime.md) value. + +**Example** + +Query: + +``` sql +SELECT snowflakeToDateTime(CAST('1426860702823350272', 'Int64'), 'UTC'); +``` + +Result: + +```response + +┌─snowflakeToDateTime(CAST('1426860702823350272', 'Int64'), 'UTC')─┐ +│ 2021-08-15 10:57:56 │ +└──────────────────────────────────────────────────────────────────┘ +``` + +## snowflakeToDateTime64 + +Extracts the timestamp component of a [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) in [DateTime64](../data-types/datetime64.md) format. + +**Syntax** + +``` sql +snowflakeToDateTime64(value[, time_zone]) +``` + +**Arguments** + +- `value` — Snowflake ID. [Int64](../data-types/int-uint.md). +- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](../data-types/string.md). + +**Returned value** + +- The timestamp component of `value` as a [DateTime64](../data-types/datetime64.md) with scale = 3, i.e. millisecond precision. + +**Example** + +Query: + +``` sql +SELECT snowflakeToDateTime64(CAST('1426860802823350272', 'Int64'), 'UTC'); +``` + +Result: + +```response + +┌─snowflakeToDateTime64(CAST('1426860802823350272', 'Int64'), 'UTC')─┐ +│ 2021-08-15 10:58:19.841 │ +└────────────────────────────────────────────────────────────────────┘ +``` + +## dateTimeToSnowflake + +Converts a [DateTime](../data-types/datetime.md) value to the first [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) at the giving time. + +**Syntax** + +``` sql +dateTimeToSnowflake(value) +``` + +**Arguments** + +- `value` — Date with time. [DateTime](../data-types/datetime.md). + +**Returned value** + +- Input value converted to the [Int64](../data-types/int-uint.md) data type as the first Snowflake ID at that time. + +**Example** + +Query: + +``` sql +WITH toDateTime('2021-08-15 18:57:56', 'Asia/Shanghai') AS dt SELECT dateTimeToSnowflake(dt); +``` + +Result: + +```response +┌─dateTimeToSnowflake(dt)─┐ +│ 1426860702823350272 │ +└─────────────────────────┘ +``` + +## dateTime64ToSnowflake + +Convert a [DateTime64](../data-types/datetime64.md) to the first [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) at the giving time. + +**Syntax** + +``` sql +dateTime64ToSnowflake(value) +``` + +**Arguments** + +- `value` — Date with time. [DateTime64](../data-types/datetime64.md). + +**Returned value** + +- Input value converted to the [Int64](../data-types/int-uint.md) data type as the first Snowflake ID at that time. + +**Example** + +Query: + +``` sql +WITH toDateTime64('2021-08-15 18:57:56.492', 3, 'Asia/Shanghai') AS dt64 SELECT dateTime64ToSnowflake(dt64); +``` + +Result: + +```response +┌─dateTime64ToSnowflake(dt64)─┐ +│ 1426860704886947840 │ +└─────────────────────────────┘ +``` ## See also -- [dictGetUUID](../../sql-reference/functions/ext-dict-functions.md#ext_dict_functions-other) +- [dictGetUUID](../functions/ext-dict-functions.md#ext_dict_functions-other) diff --git a/docs/en/sql-reference/functions/ym-dict-functions.md b/docs/en/sql-reference/functions/ym-dict-functions.md index 043686889c4..03251f0b9af 100644 --- a/docs/en/sql-reference/functions/ym-dict-functions.md +++ b/docs/en/sql-reference/functions/ym-dict-functions.md @@ -432,13 +432,13 @@ regionIn(lhs, rhs\[, geobase\]) **Parameters** -- `lhs` — Lhs region ID from the geobase. [UInt32](../../sql-reference/data-types/int-uint). -- `rhs` — Rhs region ID from the geobase. [UInt32](../../sql-reference/data-types/int-uint). +- `lhs` — Lhs region ID from the geobase. [UInt32](../data-types/int-uint). +- `rhs` — Rhs region ID from the geobase. [UInt32](../data-types/int-uint). - `geobase` — Dictionary key. See [Multiple Geobases](#multiple-geobases). [String](../data-types/string). Optional. **Returned value** -- 1, if it belongs. [UInt8](../../sql-reference/data-types/int-uint). +- 1, if it belongs. [UInt8](../data-types/int-uint). - 0, if it doesn't belong. **Implementation details** diff --git a/docs/en/sql-reference/statements/alter/comment.md b/docs/en/sql-reference/statements/alter/comment.md index f6fb179d969..320828f0de9 100644 --- a/docs/en/sql-reference/statements/alter/comment.md +++ b/docs/en/sql-reference/statements/alter/comment.md @@ -4,7 +4,7 @@ sidebar_position: 51 sidebar_label: COMMENT --- -# ALTER TABLE … MODIFY COMMENT +# ALTER TABLE ... MODIFY COMMENT Adds, modifies, or removes comment to the table, regardless if it was set before or not. Comment change is reflected in both [system.tables](../../../operations/system-tables/tables.md) and `SHOW CREATE TABLE` query. diff --git a/docs/en/sql-reference/statements/alter/delete.md b/docs/en/sql-reference/statements/alter/delete.md index b6f45b67d52..af56bec7a11 100644 --- a/docs/en/sql-reference/statements/alter/delete.md +++ b/docs/en/sql-reference/statements/alter/delete.md @@ -4,7 +4,7 @@ sidebar_position: 39 sidebar_label: DELETE --- -# ALTER TABLE … DELETE Statement +# ALTER TABLE ... DELETE Statement ``` sql ALTER TABLE [db.]table [ON CLUSTER cluster] DELETE WHERE filter_expr diff --git a/docs/en/sql-reference/statements/alter/index.md b/docs/en/sql-reference/statements/alter/index.md index 7961315c193..3cfb99cff83 100644 --- a/docs/en/sql-reference/statements/alter/index.md +++ b/docs/en/sql-reference/statements/alter/index.md @@ -42,7 +42,7 @@ These `ALTER` statements modify entities related to role-based access control: ## Mutations -`ALTER` queries that are intended to manipulate table data are implemented with a mechanism called “mutations”, most notably [ALTER TABLE … DELETE](/docs/en/sql-reference/statements/alter/delete.md) and [ALTER TABLE … UPDATE](/docs/en/sql-reference/statements/alter/update.md). They are asynchronous background processes similar to merges in [MergeTree](/docs/en/engines/table-engines/mergetree-family/index.md) tables that to produce new “mutated” versions of parts. +`ALTER` queries that are intended to manipulate table data are implemented with a mechanism called “mutations”, most notably [ALTER TABLE ... DELETE](/docs/en/sql-reference/statements/alter/delete.md) and [ALTER TABLE ... UPDATE](/docs/en/sql-reference/statements/alter/update.md). They are asynchronous background processes similar to merges in [MergeTree](/docs/en/engines/table-engines/mergetree-family/index.md) tables that to produce new “mutated” versions of parts. For `*MergeTree` tables mutations execute by **rewriting whole data parts**. There is no atomicity - parts are substituted for mutated parts as soon as they are ready and a `SELECT` query that started executing during a mutation will see data from parts that have already been mutated along with data from parts that have not been mutated yet. diff --git a/docs/en/sql-reference/statements/alter/update.md b/docs/en/sql-reference/statements/alter/update.md index ab7d0ca7378..0b300e5849a 100644 --- a/docs/en/sql-reference/statements/alter/update.md +++ b/docs/en/sql-reference/statements/alter/update.md @@ -4,7 +4,7 @@ sidebar_position: 40 sidebar_label: UPDATE --- -# ALTER TABLE … UPDATE Statements +# ALTER TABLE ... UPDATE Statements ``` sql ALTER TABLE [db.]table [ON CLUSTER cluster] UPDATE column1 = expr1 [, ...] [IN PARTITION partition_id] WHERE filter_expr diff --git a/docs/en/sql-reference/statements/alter/view.md b/docs/en/sql-reference/statements/alter/view.md index e063b27424e..83e8e9311b4 100644 --- a/docs/en/sql-reference/statements/alter/view.md +++ b/docs/en/sql-reference/statements/alter/view.md @@ -4,9 +4,9 @@ sidebar_position: 50 sidebar_label: VIEW --- -# ALTER TABLE … MODIFY QUERY Statement +# ALTER TABLE ... MODIFY QUERY Statement -You can modify `SELECT` query that was specified when a [materialized view](../create/view.md#materialized) was created with the `ALTER TABLE … MODIFY QUERY` statement without interrupting ingestion process. +You can modify `SELECT` query that was specified when a [materialized view](../create/view.md#materialized) was created with the `ALTER TABLE ... MODIFY QUERY` statement without interrupting ingestion process. This command is created to change materialized view created with `TO [db.]name` clause. It does not change the structure of the underlying storage table and it does not change the columns' definition of the materialized view, because of this the application of this command is very limited for materialized views are created without `TO [db.]name` clause. @@ -198,6 +198,6 @@ SELECT * FROM mv; `ALTER LIVE VIEW ... REFRESH` statement refreshes a [Live view](../create/view.md#live-view). See [Force Live View Refresh](../create/view.md#live-view-alter-refresh). -## ALTER TABLE … MODIFY REFRESH Statement +## ALTER TABLE ... MODIFY REFRESH Statement `ALTER TABLE ... MODIFY REFRESH` statement changes refresh parameters of a [Refreshable Materialized View](../create/view.md#refreshable-materialized-view). See [Changing Refresh Parameters](../create/view.md#changing-refresh-parameters). diff --git a/docs/en/sql-reference/statements/create/view.md b/docs/en/sql-reference/statements/create/view.md index 073a3c0d246..b526c94e508 100644 --- a/docs/en/sql-reference/statements/create/view.md +++ b/docs/en/sql-reference/statements/create/view.md @@ -306,7 +306,7 @@ CREATE WINDOW VIEW test.wv TO test.dst WATERMARK=ASCENDING ALLOWED_LATENESS=INTE Note that elements emitted by a late firing should be treated as updated results of a previous computation. Instead of firing at the end of windows, the window view will fire immediately when the late event arrives. Thus, it will result in multiple outputs for the same window. Users need to take these duplicated results into account or deduplicate them. -You can modify `SELECT` query that was specified in the window view by using `ALTER TABLE … MODIFY QUERY` statement. The data structure resulting in a new `SELECT` query should be the same as the original `SELECT` query when with or without `TO [db.]name` clause. Note that the data in the current window will be lost because the intermediate state cannot be reused. +You can modify `SELECT` query that was specified in the window view by using `ALTER TABLE ... MODIFY QUERY` statement. The data structure resulting in a new `SELECT` query should be the same as the original `SELECT` query when with or without `TO [db.]name` clause. Note that the data in the current window will be lost because the intermediate state cannot be reused. ### Monitoring New Windows diff --git a/docs/en/sql-reference/statements/insert-into.md b/docs/en/sql-reference/statements/insert-into.md index a76692cf291..f3dadabd25f 100644 --- a/docs/en/sql-reference/statements/insert-into.md +++ b/docs/en/sql-reference/statements/insert-into.md @@ -73,7 +73,7 @@ Data can be passed to the INSERT in any [format](../../interfaces/formats.md#for INSERT INTO [db.]table [(c1, c2, c3)] FORMAT format_name data_set ``` -For example, the following query format is identical to the basic version of INSERT … VALUES: +For example, the following query format is identical to the basic version of INSERT ... VALUES: ``` sql INSERT INTO [db.]table [(c1, c2, c3)] FORMAT Values (v11, v12, v13), (v21, v22, v23), ... diff --git a/docs/en/sql-reference/statements/select/limit.md b/docs/en/sql-reference/statements/select/limit.md index d61a5a44b58..58fdf988bf3 100644 --- a/docs/en/sql-reference/statements/select/limit.md +++ b/docs/en/sql-reference/statements/select/limit.md @@ -17,11 +17,11 @@ If there is no [ORDER BY](../../../sql-reference/statements/select/order-by.md) The number of rows in the result set can also depend on the [limit](../../../operations/settings/settings.md#limit) setting. ::: -## LIMIT … WITH TIES Modifier +## LIMIT ... WITH TIES Modifier When you set `WITH TIES` modifier for `LIMIT n[,m]` and specify `ORDER BY expr_list`, you will get in result first `n` or `n,m` rows and all rows with same `ORDER BY` fields values equal to row at position `n` for `LIMIT n` and `m` for `LIMIT n,m`. -This modifier also can be combined with [ORDER BY … WITH FILL modifier](../../../sql-reference/statements/select/order-by.md#orderby-with-fill). +This modifier also can be combined with [ORDER BY ... WITH FILL modifier](../../../sql-reference/statements/select/order-by.md#orderby-with-fill). For example, the following query diff --git a/docs/en/sql-reference/statements/select/order-by.md b/docs/en/sql-reference/statements/select/order-by.md index d6432a7b4f8..512a58d7cd9 100644 --- a/docs/en/sql-reference/statements/select/order-by.md +++ b/docs/en/sql-reference/statements/select/order-by.md @@ -283,7 +283,7 @@ In `MaterializedView`-engine tables the optimization works with views like `SELE ## ORDER BY Expr WITH FILL Modifier -This modifier also can be combined with [LIMIT … WITH TIES modifier](../../../sql-reference/statements/select/limit.md#limit-with-ties). +This modifier also can be combined with [LIMIT ... WITH TIES modifier](../../../sql-reference/statements/select/limit.md#limit-with-ties). `WITH FILL` modifier can be set after `ORDER BY expr` with optional `FROM expr`, `TO expr` and `STEP expr` parameters. All missed values of `expr` column will be filled sequentially and other columns will be filled as defaults. diff --git a/docs/en/sql-reference/table-functions/file.md b/docs/en/sql-reference/table-functions/file.md index 3a63811add6..f66178afbb2 100644 --- a/docs/en/sql-reference/table-functions/file.md +++ b/docs/en/sql-reference/table-functions/file.md @@ -169,7 +169,7 @@ If your listing of files contains number ranges with leading zeros, use the cons **Example** -Query the total number of rows in files named `file000`, `file001`, … , `file999`: +Query the total number of rows in files named `file000`, `file001`, ... , `file999`: ``` sql SELECT count(*) FROM file('big_dir/file{0..9}{0..9}{0..9}', 'CSV', 'name String, value UInt32'); diff --git a/docs/en/sql-reference/table-functions/gcs.md b/docs/en/sql-reference/table-functions/gcs.md index 80077ecdb33..b891d88df31 100644 --- a/docs/en/sql-reference/table-functions/gcs.md +++ b/docs/en/sql-reference/table-functions/gcs.md @@ -130,7 +130,7 @@ FROM gcs('https://storage.googleapis.com/my-test-bucket-768/{some,another}_prefi If your listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`. ::: -Count the total amount of rows in files named `file-000.csv`, `file-001.csv`, … , `file-999.csv`: +Count the total amount of rows in files named `file-000.csv`, `file-001.csv`, ... , `file-999.csv`: ``` sql SELECT count(*) diff --git a/docs/en/sql-reference/table-functions/hdfs.md b/docs/en/sql-reference/table-functions/hdfs.md index 92f904b8841..d65615e7588 100644 --- a/docs/en/sql-reference/table-functions/hdfs.md +++ b/docs/en/sql-reference/table-functions/hdfs.md @@ -85,7 +85,7 @@ If your listing of files contains number ranges with leading zeros, use the cons **Example** -Query the data from files named `file000`, `file001`, … , `file999`: +Query the data from files named `file000`, `file001`, ... , `file999`: ``` sql SELECT count(*) diff --git a/docs/en/sql-reference/table-functions/s3.md b/docs/en/sql-reference/table-functions/s3.md index 38d77a98749..cbef80371a3 100644 --- a/docs/en/sql-reference/table-functions/s3.md +++ b/docs/en/sql-reference/table-functions/s3.md @@ -137,7 +137,7 @@ FROM s3('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/ If your listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`. ::: -Count the total amount of rows in files named `file-000.csv`, `file-001.csv`, … , `file-999.csv`: +Count the total amount of rows in files named `file-000.csv`, `file-001.csv`, ... , `file-999.csv`: ``` sql SELECT count(*) diff --git a/docs/ru/development/style.md b/docs/ru/development/style.md index 4aa2073d75b..a071d0fb00d 100644 --- a/docs/ru/development/style.md +++ b/docs/ru/development/style.md @@ -57,7 +57,7 @@ memcpy(&buf[place_value], &x, sizeof(x)); for (size_t i = 0; i < rows; i += storage.index_granularity) ``` -**7.** Вокруг бинарных операторов (`+`, `-`, `*`, `/`, `%`, …), а также тернарного оператора `?:` ставятся пробелы. +**7.** Вокруг бинарных операторов (`+`, `-`, `*`, `/`, `%`, ...), а также тернарного оператора `?:` ставятся пробелы. ``` cpp UInt16 year = (s[0] - '0') * 1000 + (s[1] - '0') * 100 + (s[2] - '0') * 10 + (s[3] - '0'); @@ -86,7 +86,7 @@ dst.ClickGoodEvent = click.GoodEvent; При необходимости, оператор может быть перенесён на новую строку. В этом случае, перед ним увеличивается отступ. -**11.** Унарные операторы `--`, `++`, `*`, `&`, … не отделяются от аргумента пробелом. +**11.** Унарные операторы `--`, `++`, `*`, `&`, ... не отделяются от аргумента пробелом. **12.** После запятой ставится пробел, а перед — нет. Аналогично для точки с запятой внутри выражения `for`. @@ -115,7 +115,7 @@ public: **16.** Если на весь файл один `namespace` и кроме него ничего существенного нет, то отступ внутри `namespace` не нужен. -**17.** Если блок для выражения `if`, `for`, `while`, … состоит из одного `statement`, то фигурные скобки не обязательны. Вместо этого поместите `statement` на отдельную строку. Это правило справедливо и для вложенных `if`, `for`, `while`, … +**17.** Если блок для выражения `if`, `for`, `while`, ... состоит из одного `statement`, то фигурные скобки не обязательны. Вместо этого поместите `statement` на отдельную строку. Это правило справедливо и для вложенных `if`, `for`, `while`, ... Если внутренний `statement` содержит фигурные скобки или `else`, то внешний блок следует писать в фигурных скобках. @@ -266,7 +266,7 @@ void executeQuery( Пример взят с ресурса http://home.tamk.fi/~jaalto/course/coding-style/doc/unmaintainable-code/. -**7.** Нельзя писать мусорные комментарии (автор, дата создания…) в начале каждого файла. +**7.** Нельзя писать мусорные комментарии (автор, дата создания...) в начале каждого файла. **8.** Однострочные комментарии начинаются с трёх слешей: `///` , многострочные с `/**`. Такие комментарии считаются «документирующими». diff --git a/docs/ru/engines/table-engines/integrations/hdfs.md b/docs/ru/engines/table-engines/integrations/hdfs.md index 72087b56652..cf43eef73e3 100644 --- a/docs/ru/engines/table-engines/integrations/hdfs.md +++ b/docs/ru/engines/table-engines/integrations/hdfs.md @@ -103,7 +103,7 @@ CREATE TABLE table_with_asterisk (name String, value UInt32) ENGINE = HDFS('hdfs **Example** -Создадим таблицу с именами `file000`, `file001`, … , `file999`: +Создадим таблицу с именами `file000`, `file001`, ... , `file999`: ``` sql CREATE TABLE big_table (name String, value UInt32) ENGINE = HDFS('hdfs://hdfs1:9000/big_dir/file{0..9}{0..9}{0..9}', 'CSV') diff --git a/docs/ru/engines/table-engines/integrations/s3.md b/docs/ru/engines/table-engines/integrations/s3.md index 720aa589122..a1c69df4d0a 100644 --- a/docs/ru/engines/table-engines/integrations/s3.md +++ b/docs/ru/engines/table-engines/integrations/s3.md @@ -73,7 +73,7 @@ SELECT * FROM s3_engine_table LIMIT 2; **Пример подстановки 1** -Таблица содержит данные из файлов с именами `file-000.csv`, `file-001.csv`, … , `file-999.csv`: +Таблица содержит данные из файлов с именами `file-000.csv`, `file-001.csv`, ... , `file-999.csv`: ``` sql CREATE TABLE big_table (name String, value UInt32) diff --git a/docs/ru/engines/table-engines/mergetree-family/custom-partitioning-key.md b/docs/ru/engines/table-engines/mergetree-family/custom-partitioning-key.md index 46597c94370..c3203804211 100644 --- a/docs/ru/engines/table-engines/mergetree-family/custom-partitioning-key.md +++ b/docs/ru/engines/table-engines/mergetree-family/custom-partitioning-key.md @@ -66,7 +66,7 @@ WHERE table = 'visits' └───────────┴───────────────────┴────────┘ ``` -Столбец `partition` содержит имена всех партиций таблицы. Таблица `visits` из нашего примера содержит две партиции: `201901` и `201902`. Используйте значения из этого столбца в запросах [ALTER … PARTITION](../../../sql-reference/statements/alter/partition.md). +Столбец `partition` содержит имена всех партиций таблицы. Таблица `visits` из нашего примера содержит две партиции: `201901` и `201902`. Используйте значения из этого столбца в запросах [ALTER ... PARTITION](../../../sql-reference/statements/alter/partition.md). Столбец `name` содержит названия кусков партиций. Значения из этого столбца можно использовать в запросах [ALTER ATTACH PART](../../../sql-reference/statements/alter/partition.md#alter_attach-partition). diff --git a/docs/ru/engines/table-engines/mergetree-family/mergetree.md b/docs/ru/engines/table-engines/mergetree-family/mergetree.md index faa492d4d85..49ba229b1d5 100644 --- a/docs/ru/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/ru/engines/table-engines/mergetree-family/mergetree.md @@ -771,7 +771,7 @@ SETTINGS storage_policy = 'moving_from_ssd_to_hdd' - В результате вставки (запрос `INSERT`). - В фоновых операциях слияний и [мутаций](../../../sql-reference/statements/alter/index.md#mutations). - При скачивании данных с другой реплики. -- В результате заморозки партиций [ALTER TABLE … FREEZE PARTITION](../../../engines/table-engines/mergetree-family/mergetree.md#alter_freeze-partition). +- В результате заморозки партиций [ALTER TABLE ... FREEZE PARTITION](../../../engines/table-engines/mergetree-family/mergetree.md#alter_freeze-partition). Во всех случаях, кроме мутаций и заморозки партиций, при записи куска выбирается том и диск в соответствии с указанной конфигурацией хранилища: @@ -781,7 +781,7 @@ SETTINGS storage_policy = 'moving_from_ssd_to_hdd' Мутации и запросы заморозки партиций в реализации используют [жесткие ссылки](https://ru.wikipedia.org/wiki/%D0%96%D1%91%D1%81%D1%82%D0%BA%D0%B0%D1%8F_%D1%81%D1%81%D1%8B%D0%BB%D0%BA%D0%B0). Жесткие ссылки между различными дисками не поддерживаются, поэтому в случае таких операций куски размещаются на тех же дисках, что и исходные. В фоне куски перемещаются между томами на основе информации о занятом месте (настройка `move_factor`) по порядку, в котором указаны тома в конфигурации. Данные никогда не перемещаются с последнего тома и на первый том. Следить за фоновыми перемещениями можно с помощью системных таблиц [system.part_log](../../../engines/table-engines/mergetree-family/mergetree.md#system_tables-part-log) (поле `type = MOVE_PART`) и [system.parts](../../../engines/table-engines/mergetree-family/mergetree.md#system_tables-parts) (поля `path` и `disk`). Также подробная информация о перемещениях доступна в логах сервера. -С помощью запроса [ALTER TABLE … MOVE PART\|PARTITION … TO VOLUME\|DISK …](../../../engines/table-engines/mergetree-family/mergetree.md#alter_move-partition) пользователь может принудительно перенести кусок или партицию с одного раздела на другой. При этом учитываются все ограничения, указанные для фоновых операций. Запрос самостоятельно инициирует процесс перемещения не дожидаясь фоновых операций. В случае недостатка места или неудовлетворения ограничениям пользователь получит сообщение об ошибке. +С помощью запроса [ALTER TABLE ... MOVE PART\|PARTITION ... TO VOLUME\|DISK ...](../../../engines/table-engines/mergetree-family/mergetree.md#alter_move-partition) пользователь может принудительно перенести кусок или партицию с одного раздела на другой. При этом учитываются все ограничения, указанные для фоновых операций. Запрос самостоятельно инициирует процесс перемещения не дожидаясь фоновых операций. В случае недостатка места или неудовлетворения ограничениям пользователь получит сообщение об ошибке. Перемещения данных не взаимодействуют с репликацией данных, поэтому на разных репликах одной и той же таблицы могут быть указаны разные политики хранения. diff --git a/docs/ru/engines/table-engines/special/external-data.md b/docs/ru/engines/table-engines/special/external-data.md index 881566e5f34..3d9737096f5 100644 --- a/docs/ru/engines/table-engines/special/external-data.md +++ b/docs/ru/engines/table-engines/special/external-data.md @@ -31,7 +31,7 @@ ClickHouse позволяет отправить на сервер данные, - **--format** - формат данных в файле. Если не указано - используется TabSeparated. Должен быть указан один из следующих параметров: -- **--types** - список типов столбцов через запятую. Например, `UInt64,String`. Столбцы будут названы _1, _2, … +- **--types** - список типов столбцов через запятую. Например, `UInt64,String`. Столбцы будут названы _1, _2, ... - **--structure** - структура таблицы, в форме `UserID UInt64`, `URL String`. Определяет имена и типы столбцов. Файлы, указанные в file, будут разобраны форматом, указанным в format, с использованием типов данных, указанных в types или structure. Таблица будет загружена на сервер, и доступна там в качестве временной таблицы с именем name. diff --git a/docs/ru/faq/general/olap.md b/docs/ru/faq/general/olap.md index c9021f7c92e..bcfe9663381 100644 --- a/docs/ru/faq/general/olap.md +++ b/docs/ru/faq/general/olap.md @@ -9,13 +9,13 @@ sidebar_position: 100 [OLAP](https://ru.wikipedia.org/wiki/OLAP) (OnLine Analytical Processing) переводится как обработка данных в реальном времени. Это широкий термин, который можно рассмотреть с двух сторон: с технической и с точки зрения бизнеса. Для самого общего понимания можно просто прочитать его с конца: **Processing** - Обрабатываются некие исходные данные… + Обрабатываются некие исходные данные... **Analytical** -: … чтобы получить какие-то аналитические отчеты или новые знания… +: ... чтобы получить какие-то аналитические отчеты или новые знания... **OnLine** -: … в реальном времени, практически без задержек на обработку. +: ... в реальном времени, практически без задержек на обработку. ## OLAP с точки зрения бизнеса {#olap-from-the-business-perspective} diff --git a/docs/ru/getting-started/example-datasets/nyc-taxi.md b/docs/ru/getting-started/example-datasets/nyc-taxi.md index 12d0c18c3a1..a42033e7d41 100644 --- a/docs/ru/getting-started/example-datasets/nyc-taxi.md +++ b/docs/ru/getting-started/example-datasets/nyc-taxi.md @@ -196,7 +196,7 @@ real 75m56.214s (Импорт данных напрямую из Postgres также возможен с использованием `COPY ... TO PROGRAM`.) -К сожалению, все поля, связанные с погодой (precipitation…average_wind_speed) заполнены NULL. Из-за этого мы исключим их из финального набора данных. +К сожалению, все поля, связанные с погодой (precipitation...average_wind_speed) заполнены NULL. Из-за этого мы исключим их из финального набора данных. Для начала мы создадим таблицу на одном сервере. Позже мы сделаем таблицу распределенной. diff --git a/docs/ru/index.md b/docs/ru/index.md index 29f2bbe07fb..02be8912b94 100644 --- a/docs/ru/index.md +++ b/docs/ru/index.md @@ -12,10 +12,10 @@ ClickHouse — столбцовая система управления база | Строка | WatchID | JavaEnable | Title | GoodEvent | EventTime | |--------|-------------|------------|--------------------|-----------|---------------------| -| #0 | 89354350662 | 1 | Investor Relations | 1 | 2016-05-18 05:19:20 | -| #1 | 90329509958 | 0 | Contact us | 1 | 2016-05-18 08:10:20 | -| #2 | 89953706054 | 1 | Mission | 1 | 2016-05-18 07:38:00 | -| #N | … | … | … | … | … | +| #0 | 89354350662 | 1 | Investor Relations | 1 | 2016-05-18 05:19:20 | +| #1 | 90329509958 | 0 | Contact us | 1 | 2016-05-18 08:10:20 | +| #2 | 89953706054 | 1 | Mission | 1 | 2016-05-18 07:38:00 | +| #N | ... | ... | ... | ... | ... | То есть, значения, относящиеся к одной строке, физически хранятся рядом. @@ -24,13 +24,13 @@ ClickHouse — столбцовая система управления база В столбцовых СУБД данные хранятся в таком порядке: -| Строка: | #0 | #1 | #2 | #N | +| Строка: | #0 | #1 | #2 | #N | |-------------|---------------------|---------------------|---------------------|-----| -| WatchID: | 89354350662 | 90329509958 | 89953706054 | … | -| JavaEnable: | 1 | 0 | 1 | … | -| Title: | Investor Relations | Contact us | Mission | … | -| GoodEvent: | 1 | 1 | 1 | … | -| EventTime: | 2016-05-18 05:19:20 | 2016-05-18 08:10:20 | 2016-05-18 07:38:00 | … | +| WatchID: | 89354350662 | 90329509958 | 89953706054 | ... | +| JavaEnable: | 1 | 0 | 1 | ... | +| Title: | Investor Relations | Contact us | Mission | ... | +| GoodEvent: | 1 | 1 | 1 | ... | +| EventTime: | 2016-05-18 05:19:20 | 2016-05-18 08:10:20 | 2016-05-18 07:38:00 | ... | В примерах изображён только порядок расположения данных. То есть значения из разных столбцов хранятся отдельно, а данные одного столбца — вместе. diff --git a/docs/ru/interfaces/formats.md b/docs/ru/interfaces/formats.md index a9280de9c7b..4ed42b6fb22 100644 --- a/docs/ru/interfaces/formats.md +++ b/docs/ru/interfaces/formats.md @@ -119,6 +119,7 @@ Hello\nworld Hello\ world ``` +`\n\r` (CRLF) поддерживается с помощью настройки `input_format_tsv_crlf_end_of_line`. Второй вариант поддерживается, так как его использует MySQL при записи tab-separated дампа. diff --git a/docs/ru/operations/settings/query-complexity.md b/docs/ru/operations/settings/query-complexity.md index d1d38a587c6..e82a5a008eb 100644 --- a/docs/ru/operations/settings/query-complexity.md +++ b/docs/ru/operations/settings/query-complexity.md @@ -260,7 +260,7 @@ FORMAT Null; Ограничивает количество строк в хэш-таблице, используемой при соединении таблиц. -Параметр применяется к операциям [SELECT… JOIN](../../sql-reference/statements/select/join.md#select-join) и к движку таблиц [Join](../../engines/table-engines/special/join.md). +Параметр применяется к операциям [SELECT... JOIN](../../sql-reference/statements/select/join.md#select-join) и к движку таблиц [Join](../../engines/table-engines/special/join.md). Если запрос содержит несколько `JOIN`, то ClickHouse проверяет значение настройки для каждого промежуточного результата. @@ -277,7 +277,7 @@ FORMAT Null; Ограничивает размер (в байтах) хэш-таблицы, используемой при объединении таблиц. -Параметр применяется к операциям [SELECT… JOIN](../../sql-reference/statements/select/join.md#select-join) и к движку таблиц [Join](../../engines/table-engines/special/join.md). +Параметр применяется к операциям [SELECT... JOIN](../../sql-reference/statements/select/join.md#select-join) и к движку таблиц [Join](../../engines/table-engines/special/join.md). Если запрос содержит несколько `JOIN`, то ClickHouse проверяет значение настройки для каждого промежуточного результата. diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index 2b3607dcf08..3a70a0bac12 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -1859,7 +1859,7 @@ SELECT * FROM test_table ## count_distinct_implementation {#settings-count_distinct_implementation} -Задаёт, какая из функций `uniq*` используется при выполнении конструкции [COUNT(DISTINCT …)](../../sql-reference/aggregate-functions/reference/count.md#agg_function-count). +Задаёт, какая из функций `uniq*` используется при выполнении конструкции [COUNT(DISTINCT ...)](../../sql-reference/aggregate-functions/reference/count.md#agg_function-count). Возможные значения: diff --git a/docs/ru/sql-reference/aggregate-functions/parametric-functions.md b/docs/ru/sql-reference/aggregate-functions/parametric-functions.md index 6463f6bd95d..e6a61d9b381 100644 --- a/docs/ru/sql-reference/aggregate-functions/parametric-functions.md +++ b/docs/ru/sql-reference/aggregate-functions/parametric-functions.md @@ -82,7 +82,7 @@ FROM В этом случае необходимо помнить, что границы корзин гистограммы не известны. -## sequenceMatch(pattern)(timestamp, cond1, cond2, …) {#function-sequencematch} +## sequenceMatch(pattern)(timestamp, cond1, cond2, ...) {#function-sequencematch} Проверяет, содержит ли последовательность событий цепочку, которая соответствует указанному шаблону. @@ -172,7 +172,7 @@ SELECT sequenceMatch('(?1)(?2)')(time, number = 1, number = 2, number = 4) FROM - [sequenceCount](#function-sequencecount) -## sequenceCount(pattern)(time, cond1, cond2, …) {#function-sequencecount} +## sequenceCount(pattern)(time, cond1, cond2, ...) {#function-sequencecount} Вычисляет количество цепочек событий, соответствующих шаблону. Функция обнаруживает только непересекающиеся цепочки событий. Она начинает искать следующую цепочку только после того, как полностью совпала текущая цепочка событий. diff --git a/docs/ru/sql-reference/aggregate-functions/reference/quantiles.md b/docs/ru/sql-reference/aggregate-functions/reference/quantiles.md index fed0f8b328b..a0a430f7a68 100644 --- a/docs/ru/sql-reference/aggregate-functions/reference/quantiles.md +++ b/docs/ru/sql-reference/aggregate-functions/reference/quantiles.md @@ -7,7 +7,7 @@ sidebar_position: 201 ## quantiles {#quantiles} -Синтаксис: `quantiles(level1, level2, …)(x)` +Синтаксис: `quantiles(level1, level2, ...)(x)` Все функции для вычисления квантилей имеют соответствующие функции для вычисления нескольких квантилей: `quantiles`, `quantilesDeterministic`, `quantilesTiming`, `quantilesTimingWeighted`, `quantilesExact`, `quantilesExactWeighted`, `quantilesTDigest`, `quantilesBFloat16`. Эти функции вычисляют все квантили указанных уровней в один проход и возвращают массив с вычисленными значениями. diff --git a/docs/ru/sql-reference/data-types/aggregatefunction.md b/docs/ru/sql-reference/data-types/aggregatefunction.md index e42b467e4af..0481151c7e4 100644 --- a/docs/ru/sql-reference/data-types/aggregatefunction.md +++ b/docs/ru/sql-reference/data-types/aggregatefunction.md @@ -6,9 +6,9 @@ sidebar_label: AggregateFunction # AggregateFunction {#data-type-aggregatefunction} -Агрегатные функции могут обладать определяемым реализацией промежуточным состоянием, которое может быть сериализовано в тип данных, соответствующий AggregateFunction(…), и быть записано в таблицу обычно посредством [материализованного представления](../../sql-reference/statements/create/view.md). Чтобы получить промежуточное состояние, обычно используются агрегатные функции с суффиксом `-State`. Чтобы в дальнейшем получить агрегированные данные необходимо использовать те же агрегатные функции с суффиксом `-Merge`. +Агрегатные функции могут обладать определяемым реализацией промежуточным состоянием, которое может быть сериализовано в тип данных, соответствующий AggregateFunction(...), и быть записано в таблицу обычно посредством [материализованного представления](../../sql-reference/statements/create/view.md). Чтобы получить промежуточное состояние, обычно используются агрегатные функции с суффиксом `-State`. Чтобы в дальнейшем получить агрегированные данные необходимо использовать те же агрегатные функции с суффиксом `-Merge`. -`AggregateFunction(name, types_of_arguments…)` — параметрический тип данных. +`AggregateFunction(name, types_of_arguments...)` — параметрический тип данных. **Параметры** diff --git a/docs/ru/sql-reference/data-types/fixedstring.md b/docs/ru/sql-reference/data-types/fixedstring.md index d7a4e865903..56a5632f88d 100644 --- a/docs/ru/sql-reference/data-types/fixedstring.md +++ b/docs/ru/sql-reference/data-types/fixedstring.md @@ -21,8 +21,8 @@ sidebar_label: FixedString(N) Примеры значений, которые можно эффективно хранить в столбцах типа `FixedString`: - Двоичное представление IP-адреса (`FixedString(16)` для IPv6). -- Коды языков (ru_RU, en_US … ). -- Коды валют (USD, RUB … ). +- Коды языков (ru_RU, en_US ... ). +- Коды валют (USD, RUB ... ). - Двоичное представление хэшей (`FixedString(16)` для MD5, `FixedString(32)` для SHA256). Для хранения значений UUID используйте тип данных [UUID](uuid.md). diff --git a/docs/ru/sql-reference/data-types/nested-data-structures/nested.md b/docs/ru/sql-reference/data-types/nested-data-structures/nested.md index 4ec8333d563..8fd293a0415 100644 --- a/docs/ru/sql-reference/data-types/nested-data-structures/nested.md +++ b/docs/ru/sql-reference/data-types/nested-data-structures/nested.md @@ -3,7 +3,7 @@ slug: /ru/sql-reference/data-types/nested-data-structures/nested --- # Nested {#nested} -## Nested(Name1 Type1, Name2 Type2, …) {#nestedname1-type1-name2-type2} +## Nested(Name1 Type1, Name2 Type2, ...) {#nestedname1-type1-name2-type2} Вложенная структура данных - это как будто вложенная таблица. Параметры вложенной структуры данных - имена и типы столбцов, указываются так же, как у запроса CREATE. Каждой строке таблицы может соответствовать произвольное количество строк вложенной структуры данных. diff --git a/docs/ru/sql-reference/data-types/tuple.md b/docs/ru/sql-reference/data-types/tuple.md index 8953134d154..9d86c26c563 100644 --- a/docs/ru/sql-reference/data-types/tuple.md +++ b/docs/ru/sql-reference/data-types/tuple.md @@ -4,7 +4,7 @@ sidebar_position: 54 sidebar_label: Tuple(T1, T2, ...) --- -# Tuple(T1, T2, …) {#tuplet1-t2} +# Tuple(T1, T2, ...) {#tuplet1-t2} Кортеж из элементов любого [типа](index.md#data_types). Элементы кортежа могут быть одного или разных типов. diff --git a/docs/ru/sql-reference/functions/array-functions.md b/docs/ru/sql-reference/functions/array-functions.md index 1f06bdf264a..825e3f06be2 100644 --- a/docs/ru/sql-reference/functions/array-functions.md +++ b/docs/ru/sql-reference/functions/array-functions.md @@ -161,7 +161,7 @@ SELECT range(5), range(1, 5), range(1, 5, 2); ``` -## array(x1, …), оператор \[x1, …\] {#arrayx1-operator-x1} +## array(x1, ...), оператор \[x1, ...\] {#arrayx1-operator-x1} Создаёт массив из аргументов функции. Аргументы должны быть константами и иметь типы, для которых есть наименьший общий тип. Должен быть передан хотя бы один аргумент, так как иначе непонятно, какого типа создавать массив. То есть, с помощью этой функции невозможно создать пустой массив (для этого используйте функции emptyArray\*, описанные выше). @@ -308,7 +308,7 @@ SELECT indexOf([1, 3, NULL, NULL], NULL) Элементы, равные `NULL`, обрабатываются как обычные значения. -## arrayCount(\[func,\] arr1, …) {#array-count} +## arrayCount(\[func,\] arr1, ...) {#array-count} Возвращает количество элементов массива `arr`, для которых функция `func` возвращает не 0. Если `func` не указана - возвращает количество ненулевых элементов массива. @@ -335,7 +335,7 @@ SELECT countEqual([1, 2, NULL, NULL], NULL) ## arrayEnumerate(arr) {#array_functions-arrayenumerate} -Возвращает массив \[1, 2, 3, …, length(arr)\] +Возвращает массив \[1, 2, 3, ..., length(arr)\] Эта функция обычно используется совместно с ARRAY JOIN. Она позволяет, после применения ARRAY JOIN, посчитать что-либо только один раз для каждого массива. Пример: @@ -375,7 +375,7 @@ WHERE (CounterID = 160656) AND notEmpty(GoalsReached) Также эта функция может быть использована в функциях высшего порядка. Например, с её помощью можно достать индексы массива для элементов, удовлетворяющих некоторому условию. -## arrayEnumerateUniq(arr, …) {#arrayenumerateuniqarr} +## arrayEnumerateUniq(arr, ...) {#arrayenumerateuniqarr} Возвращает массив, такого же размера, как исходный, где для каждого элемента указано, какой он по счету среди элементов с таким же значением. Например: arrayEnumerateUniq(\[10, 20, 10, 30\]) = \[1, 1, 2, 1\]. @@ -597,7 +597,7 @@ SELECT arraySlice([1, 2, NULL, 4, 5], 2, 3) AS res; Элементы массива равные `NULL` обрабатываются как обычные значения. -## arraySort(\[func,\] arr, …) {#array_functions-sort} +## arraySort(\[func,\] arr, ...) {#array_functions-sort} Возвращает массив `arr`, отсортированный в восходящем порядке. Если задана функция `func`, то порядок сортировки определяется результатом применения этой функции на элементы массива `arr`. Если `func` принимает несколько аргументов, то в функцию `arraySort` нужно передавать несколько массивов, которые будут соответствовать аргументам функции `func`. Подробные примеры рассмотрены в конце описания `arraySort`. @@ -698,11 +698,11 @@ SELECT arraySort((x, y) -> -y, [0, 1, 2], [1, 2, 3]) as res; Для улучшения эффективности сортировки применяется [преобразование Шварца](https://ru.wikipedia.org/wiki/%D0%9F%D1%80%D0%B5%D0%BE%D0%B1%D1%80%D0%B0%D0%B7%D0%BE%D0%B2%D0%B0%D0%BD%D0%B8%D0%B5_%D0%A8%D0%B2%D0%B0%D1%80%D1%86%D0%B0). ::: -## arrayPartialSort(\[func,\] limit, arr, …) {#array_functions-sort} +## arrayPartialSort(\[func,\] limit, arr, ...) {#array_functions-sort} То же, что и `arraySort` с дополнительным аргументом `limit`, позволяющим частичную сортировку. Возвращает массив того же размера, как и исходный, в котором элементы `[1..limit]` отсортированы в возрастающем порядке. Остальные элементы `(limit..N]` остаются в неспецифицированном порядке. -## arrayReverseSort(\[func,\] arr, …) {#array_functions-reverse-sort} +## arrayReverseSort(\[func,\] arr, ...) {#array_functions-reverse-sort} Возвращает массив `arr`, отсортированный в нисходящем порядке. Если указана функция `func`, то массив `arr` сначала сортируется в порядке, который определяется функцией `func`, а затем отсортированный массив переворачивается. Если функция `func` принимает несколько аргументов, то в функцию `arrayReverseSort` необходимо передавать несколько массивов, которые будут соответствовать аргументам функции `func`. Подробные примеры рассмотрены в конце описания функции `arrayReverseSort`. @@ -803,11 +803,11 @@ SELECT arrayReverseSort((x, y) -> -y, [4, 3, 5], [1, 2, 3]) AS res; └─────────┘ ``` -## arrayPartialReverseSort(\[func,\] limit, arr, …) {#array_functions-sort} +## arrayPartialReverseSort(\[func,\] limit, arr, ...) {#array_functions-sort} То же, что и `arrayReverseSort` с дополнительным аргументом `limit`, позволяющим частичную сортировку. Возвращает массив того же размера, как и исходный, в котором элементы `[1..limit]` отсортированы в убывающем порядке. Остальные элементы `(limit..N]` остаются в неспецифицированном порядке. -## arrayUniq(arr, …) {#array-functions-arrayuniq} +## arrayUniq(arr, ...) {#array-functions-arrayuniq} Если передан один аргумент, считает количество разных элементов в массиве. Если передано несколько аргументов, считает количество разных кортежей из элементов на соответствующих позициях в нескольких массивах. @@ -1174,7 +1174,7 @@ SELECT arrayZip(['a', 'b', 'c'], [5, 2, 1]); └──────────────────────────────────────┘ ``` -## arrayMap(func, arr1, …) {#array-map} +## arrayMap(func, arr1, ...) {#array-map} Возвращает массив, полученный на основе результатов применения функции `func` к каждому элементу массива `arr`. @@ -1204,7 +1204,7 @@ SELECT arrayMap((x, y) -> (x, y), [1, 2, 3], [4, 5, 6]) AS res; Функция `arrayMap` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) — в качестве первого аргумента ей нужно передать лямбда-функцию, и этот аргумент не может быть опущен. -## arrayFilter(func, arr1, …) {#array-filter} +## arrayFilter(func, arr1, ...) {#array-filter} Возвращает массив, содержащий только те элементы массива `arr1`, для которых функция `func` возвращает не 0. @@ -1237,7 +1237,7 @@ SELECT Функция `arrayFilter` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) — в качестве первого аргумента ей нужно передать лямбда-функцию, и этот аргумент не может быть опущен. -## arrayFill(func, arr1, …) {#array-fill} +## arrayFill(func, arr1, ...) {#array-fill} Перебирает `arr1` от первого элемента к последнему и заменяет `arr1[i]` на `arr1[i - 1]`, если `func` вернула 0. Первый элемент `arr1` остаётся неизменным. @@ -1255,7 +1255,7 @@ SELECT arrayFill(x -> not isNull(x), [1, null, 3, 11, 12, null, null, 5, 6, 14, Функция `arrayFill` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) — в качестве первого аргумента ей нужно передать лямбда-функцию, и этот аргумент не может быть опущен. -## arrayReverseFill(func, arr1, …) {#array-reverse-fill} +## arrayReverseFill(func, arr1, ...) {#array-reverse-fill} Перебирает `arr1` от последнего элемента к первому и заменяет `arr1[i]` на `arr1[i + 1]`, если `func` вернула 0. Последний элемент `arr1` остаётся неизменным. @@ -1273,7 +1273,7 @@ SELECT arrayReverseFill(x -> not isNull(x), [1, null, 3, 11, 12, null, null, 5, Функция `arrayReverseFill` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) — в качестве первого аргумента ей нужно передать лямбда-функцию, и этот аргумент не может быть опущен. -## arraySplit(func, arr1, …) {#array-split} +## arraySplit(func, arr1, ...) {#array-split} Разделяет массив `arr1` на несколько. Если `func` возвращает не 0, то массив разделяется, а элемент помещается в левую часть. Массив не разбивается по первому элементу. @@ -1291,7 +1291,7 @@ SELECT arraySplit((x, y) -> y, [1, 2, 3, 4, 5], [1, 0, 0, 1, 0]) AS res Функция `arraySplit` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) — в качестве первого аргумента ей нужно передать лямбда-функцию, и этот аргумент не может быть опущен. -## arrayReverseSplit(func, arr1, …) {#array-reverse-split} +## arrayReverseSplit(func, arr1, ...) {#array-reverse-split} Разделяет массив `arr1` на несколько. Если `func` возвращает не 0, то массив разделяется, а элемент помещается в правую часть. Массив не разбивается по последнему элементу. @@ -1309,25 +1309,25 @@ SELECT arrayReverseSplit((x, y) -> y, [1, 2, 3, 4, 5], [1, 0, 0, 1, 0]) AS res Функция `arrayReverseSplit` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) — в качестве первого аргумента ей нужно передать лямбда-функцию, и этот аргумент не может быть опущен. -## arrayExists(\[func,\] arr1, …) {#arrayexistsfunc-arr1} +## arrayExists(\[func,\] arr1, ...) {#arrayexistsfunc-arr1} Возвращает 1, если существует хотя бы один элемент массива `arr`, для которого функция func возвращает не 0. Иначе возвращает 0. Функция `arrayExists` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) - в качестве первого аргумента ей можно передать лямбда-функцию. -## arrayAll(\[func,\] arr1, …) {#arrayallfunc-arr1} +## arrayAll(\[func,\] arr1, ...) {#arrayallfunc-arr1} Возвращает 1, если для всех элементов массива `arr`, функция `func` возвращает не 0. Иначе возвращает 0. Функция `arrayAll` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) - в качестве первого аргумента ей можно передать лямбда-функцию. -## arrayFirst(func, arr1, …) {#array-first} +## arrayFirst(func, arr1, ...) {#array-first} Возвращает первый элемент массива `arr1`, для которого функция func возвращает не 0. Функция `arrayFirst` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) — в качестве первого аргумента ей нужно передать лямбда-функцию, и этот аргумент не может быть опущен. -## arrayFirstIndex(func, arr1, …) {#array-first-index} +## arrayFirstIndex(func, arr1, ...) {#array-first-index} Возвращает индекс первого элемента массива `arr1`, для которого функция func возвращает не 0. @@ -1599,7 +1599,7 @@ SELECT arraySum(x -> x*x, [2, 3]) AS res; └─────┘ ``` -## arrayCumSum(\[func,\] arr1, …) {#arraycumsumfunc-arr1} +## arrayCumSum(\[func,\] arr1, ...) {#arraycumsumfunc-arr1} Возвращает массив из частичных сумм элементов исходного массива (сумма с накоплением). Если указана функция `func`, то значения элементов массива преобразуются этой функцией перед суммированием. diff --git a/docs/ru/sql-reference/functions/date-time-functions.md b/docs/ru/sql-reference/functions/date-time-functions.md index 56ae4359bf1..bcc5f807c32 100644 --- a/docs/ru/sql-reference/functions/date-time-functions.md +++ b/docs/ru/sql-reference/functions/date-time-functions.md @@ -559,7 +559,7 @@ SELECT Описание режимов (mode): -| Mode | Первый день недели | Диапазон | Неделя 1 это первая неделя … | +| Mode | Первый день недели | Диапазон | Неделя 1 это первая неделя ... | | ----------- | -------- | -------- | ------------------ | |0|Воскресенье|0-53|с воскресеньем в этом году |1|Понедельник|0-53|с 4-мя или более днями в этом году diff --git a/docs/ru/sql-reference/functions/json-functions.md b/docs/ru/sql-reference/functions/json-functions.md index 123f40ce05d..18f625bf80f 100644 --- a/docs/ru/sql-reference/functions/json-functions.md +++ b/docs/ru/sql-reference/functions/json-functions.md @@ -88,7 +88,7 @@ SELECT isValidJSON('{"a": "hello", "b": [-100, 200.0, 300]}') = 1 SELECT isValidJSON('not a json') = 0 ``` -## JSONHas(json\[, indices_or_keys\]…) {#jsonhasjson-indices-or-keys} +## JSONHas(json\[, indices_or_keys\]...) {#jsonhasjson-indices-or-keys} Если значение существует в документе JSON, то возвращается `1`. @@ -121,7 +121,7 @@ SELECT JSONExtractKey('{"a": "hello", "b": [-100, 200.0, 300]}', -2) = 'a' SELECT JSONExtractString('{"a": "hello", "b": [-100, 200.0, 300]}', 1) = 'hello' ``` -## JSONLength(json\[, indices_or_keys\]…) {#jsonlengthjson-indices-or-keys} +## JSONLength(json\[, indices_or_keys\]...) {#jsonlengthjson-indices-or-keys} Возвращает длину массива JSON или объекта JSON. @@ -134,7 +134,7 @@ SELECT JSONLength('{"a": "hello", "b": [-100, 200.0, 300]}', 'b') = 3 SELECT JSONLength('{"a": "hello", "b": [-100, 200.0, 300]}') = 2 ``` -## JSONType(json\[, indices_or_keys\]…) {#jsontypejson-indices-or-keys} +## JSONType(json\[, indices_or_keys\]...) {#jsontypejson-indices-or-keys} Возвращает тип значения JSON. @@ -148,13 +148,13 @@ SELECT JSONType('{"a": "hello", "b": [-100, 200.0, 300]}', 'a') = 'String' SELECT JSONType('{"a": "hello", "b": [-100, 200.0, 300]}', 'b') = 'Array' ``` -## JSONExtractUInt(json\[, indices_or_keys\]…) {#jsonextractuintjson-indices-or-keys} +## JSONExtractUInt(json\[, indices_or_keys\]...) {#jsonextractuintjson-indices-or-keys} -## JSONExtractInt(json\[, indices_or_keys\]…) {#jsonextractintjson-indices-or-keys} +## JSONExtractInt(json\[, indices_or_keys\]...) {#jsonextractintjson-indices-or-keys} -## JSONExtractFloat(json\[, indices_or_keys\]…) {#jsonextractfloatjson-indices-or-keys} +## JSONExtractFloat(json\[, indices_or_keys\]...) {#jsonextractfloatjson-indices-or-keys} -## JSONExtractBool(json\[, indices_or_keys\]…) {#jsonextractbooljson-indices-or-keys} +## JSONExtractBool(json\[, indices_or_keys\]...) {#jsonextractbooljson-indices-or-keys} Парсит JSON и извлекает значение. Эти функции аналогичны функциям `visitParam`. @@ -168,7 +168,7 @@ SELECT JSONExtractFloat('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 2) = 200 SELECT JSONExtractUInt('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', -1) = 300 ``` -## JSONExtractString(json\[, indices_or_keys\]…) {#jsonextractstringjson-indices-or-keys} +## JSONExtractString(json\[, indices_or_keys\]...) {#jsonextractstringjson-indices-or-keys} Парсит JSON и извлекает строку. Эта функция аналогична функции `visitParamExtractString`. @@ -186,7 +186,7 @@ SELECT JSONExtractString('{"abc":"\\u263"}', 'abc') = '' SELECT JSONExtractString('{"abc":"hello}', 'abc') = '' ``` -## JSONExtract(json\[, indices_or_keys…\], Return_type) {#jsonextractjson-indices-or-keys-return-type} +## JSONExtract(json\[, indices_or_keys...\], Return_type) {#jsonextractjson-indices-or-keys-return-type} Парсит JSON и извлекает значение с заданным типом данных. @@ -207,7 +207,7 @@ SELECT JSONExtract('{"day": "Thursday"}', 'day', 'Enum8(\'Sunday\' = 0, \'Monday SELECT JSONExtract('{"day": 5}', 'day', 'Enum8(\'Sunday\' = 0, \'Monday\' = 1, \'Tuesday\' = 2, \'Wednesday\' = 3, \'Thursday\' = 4, \'Friday\' = 5, \'Saturday\' = 6)') = 'Friday' ``` -## JSONExtractKeysAndValues(json\[, indices_or_keys…\], Value_type) {#jsonextractkeysandvaluesjson-indices-or-keys-value-type} +## JSONExtractKeysAndValues(json\[, indices_or_keys...\], Value_type) {#jsonextractkeysandvaluesjson-indices-or-keys-value-type} Разбор пар ключ-значение из JSON, где значение имеет тип данных ClickHouse. @@ -255,7 +255,7 @@ text └────────────────────────────────────────────────────────────┘ ``` -## JSONExtractRaw(json\[, indices_or_keys\]…) {#jsonextractrawjson-indices-or-keys} +## JSONExtractRaw(json\[, indices_or_keys\]...) {#jsonextractrawjson-indices-or-keys} Возвращает часть JSON в виде строки, содержащей неразобранную подстроку. @@ -267,7 +267,7 @@ text SELECT JSONExtractRaw('{"a": "hello", "b": [-100, 200.0, 300]}', 'b') = '[-100, 200.0, 300]'; ``` -## JSONExtractArrayRaw(json\[, indices_or_keys\]…) {#jsonextractarrayrawjson-indices-or-keys} +## JSONExtractArrayRaw(json\[, indices_or_keys\]...) {#jsonextractarrayrawjson-indices-or-keys} Возвращает массив из элементов JSON массива, каждый из которых представлен в виде строки с неразобранными подстроками из JSON. diff --git a/docs/ru/sql-reference/functions/other-functions.md b/docs/ru/sql-reference/functions/other-functions.md index 835aed934d5..f7637cfa3f7 100644 --- a/docs/ru/sql-reference/functions/other-functions.md +++ b/docs/ru/sql-reference/functions/other-functions.md @@ -286,7 +286,7 @@ SELECT byteSize(NULL, 1, 0.3, ''); Превращает константу в полноценный столбец, содержащий только одно значение. В ClickHouse полноценные столбцы и константы представлены в памяти по-разному. Функции по-разному работают для аргументов-констант и обычных аргументов (выполняется разный код), хотя результат почти всегда должен быть одинаковым. Эта функция предназначена для отладки такого поведения. -## ignore(…) {#ignore} +## ignore(...) {#ignore} Принимает любые аргументы, в т.ч. `NULL`, всегда возвращает 0. При этом, аргумент всё равно вычисляется. Это может использоваться для бенчмарков. diff --git a/docs/ru/sql-reference/functions/string-functions.md b/docs/ru/sql-reference/functions/string-functions.md index eeb5752c626..fc258f7b4cf 100644 --- a/docs/ru/sql-reference/functions/string-functions.md +++ b/docs/ru/sql-reference/functions/string-functions.md @@ -358,7 +358,7 @@ SELECT repeat('abc', 10); Разворачивает последовательность кодовых точек Unicode, при допущении, что строка содержит набор байтов, представляющий текст в кодировке UTF-8. Иначе — что-то делает (не кидает исключение). -## format(pattern, s0, s1, …) {#format} +## format(pattern, s0, s1, ...) {#format} Форматирует константный шаблон со строками, перечисленными в аргументах. `pattern` — упрощенная версия шаблона в языке Python. Шаблон содержит «заменяющие поля», которые окружены фигурными скобками `{}`. Всё, что не содержится в скобках, интерпретируется как обычный текст и просто копируется. Если нужно использовать символ фигурной скобки, можно экранировать двойной скобкой `{{ '{{' }}` или `{{ '}}' }}`. Имя полей могут быть числами (нумерация с нуля) или пустыми (тогда они интерпретируются как последовательные числа). diff --git a/docs/ru/sql-reference/functions/string-search-functions.md b/docs/ru/sql-reference/functions/string-search-functions.md index 4f9ae4428a4..53da9a6e791 100644 --- a/docs/ru/sql-reference/functions/string-search-functions.md +++ b/docs/ru/sql-reference/functions/string-search-functions.md @@ -311,19 +311,19 @@ Result: Смотрите `multiSearchAllPositions`. -## multiSearchFirstPosition(haystack, \[needle1, needle2, …, needlen\]) {#multisearchfirstpositionhaystack-needle1-needle2-needlen} +## multiSearchFirstPosition(haystack, \[needle1, needle2, ..., needlen\]) {#multisearchfirstpositionhaystack-needle1-needle2-needlen} Так же, как и `position`, только возвращает оффсет первого вхождения любого из needles. Для поиска без учета регистра и/или в кодировке UTF-8 используйте функции `multiSearchFirstPositionCaseInsensitive, multiSearchFirstPositionUTF8, multiSearchFirstPositionCaseInsensitiveUTF8`. -## multiSearchFirstIndex(haystack, \[needle1, needle2, …, needlen\]) {#multisearchfirstindexhaystack-needle1-needle2-needlen} +## multiSearchFirstIndex(haystack, \[needle1, needle2, ..., needlen\]) {#multisearchfirstindexhaystack-needle1-needle2-needlen} Возвращает индекс `i` (нумерация с единицы) первой найденной строки needlei в строке `haystack` и 0 иначе. Для поиска без учета регистра и/или в кодировке UTF-8 используйте функции `multiSearchFirstIndexCaseInsensitive, multiSearchFirstIndexUTF8, multiSearchFirstIndexCaseInsensitiveUTF8`. -## multiSearchAny(haystack, \[needle1, needle2, …, needlen\]) {#function-multisearchany} +## multiSearchAny(haystack, \[needle1, needle2, ..., needlen\]) {#function-multisearchany} Возвращает 1, если хотя бы одна подстрока needlei нашлась в строке `haystack` и 0 иначе. @@ -343,30 +343,30 @@ Result: Регулярное выражение работает со строкой как с набором байт. Регулярное выражение не может содержать нулевые байты. Для шаблонов на поиск подстроки в строке, лучше используйте LIKE или position, так как они работают существенно быстрее. -## multiMatchAny(haystack, \[pattern1, pattern2, …, patternn\]) {#multimatchanyhaystack-pattern1-pattern2-patternn} +## multiMatchAny(haystack, \[pattern1, pattern2, ..., patternn\]) {#multimatchanyhaystack-pattern1-pattern2-patternn} То же, что и `match`, но возвращает ноль, если ни одно регулярное выражение не подошло и один, если хотя бы одно. Используется библиотека [hyperscan](https://github.com/intel/hyperscan) для соответствия регулярных выражений. Для шаблонов на поиск многих подстрок в строке, лучше используйте `multiSearchAny`, так как она работает существенно быстрее. :::note Примечание Длина любой строки из `haystack` должна быть меньше 232 байт, иначе бросается исключение. Это ограничение связано с ограничением hyperscan API. ::: -## multiMatchAnyIndex(haystack, \[pattern1, pattern2, …, patternn\]) {#multimatchanyindexhaystack-pattern1-pattern2-patternn} +## multiMatchAnyIndex(haystack, \[pattern1, pattern2, ..., patternn\]) {#multimatchanyindexhaystack-pattern1-pattern2-patternn} То же, что и `multiMatchAny`, только возвращает любой индекс подходящего регулярного выражения. -## multiMatchAllIndices(haystack, \[pattern1, pattern2, …, patternn\]) {#multimatchallindiceshaystack-pattern1-pattern2-patternn} +## multiMatchAllIndices(haystack, \[pattern1, pattern2, ..., patternn\]) {#multimatchallindiceshaystack-pattern1-pattern2-patternn} То же, что и `multiMatchAny`, только возвращает массив всех индексов всех подходящих регулярных выражений в любом порядке. -## multiFuzzyMatchAny(haystack, distance, \[pattern1, pattern2, …, patternn\]) {#multifuzzymatchanyhaystack-distance-pattern1-pattern2-patternn} +## multiFuzzyMatchAny(haystack, distance, \[pattern1, pattern2, ..., patternn\]) {#multifuzzymatchanyhaystack-distance-pattern1-pattern2-patternn} То же, что и `multiMatchAny`, но возвращает 1 если любой шаблон соответствует haystack в пределах константного [редакционного расстояния](https://en.wikipedia.org/wiki/Edit_distance). Эта функция основана на экспериментальной библиотеке [hyperscan](https://intel.github.io/hyperscan/dev-reference/compilation.html#approximate-matching) и может быть медленной для некоторых частных случаев. Производительность зависит от значения редакционного расстояния и используемых шаблонов, но всегда медленнее по сравнению с non-fuzzy вариантами. -## multiFuzzyMatchAnyIndex(haystack, distance, \[pattern1, pattern2, …, patternn\]) {#multifuzzymatchanyindexhaystack-distance-pattern1-pattern2-patternn} +## multiFuzzyMatchAnyIndex(haystack, distance, \[pattern1, pattern2, ..., patternn\]) {#multifuzzymatchanyindexhaystack-distance-pattern1-pattern2-patternn} То же, что и `multiFuzzyMatchAny`, только возвращает любой индекс подходящего регулярного выражения в пределах константного редакционного расстояния. -## multiFuzzyMatchAllIndices(haystack, distance, \[pattern1, pattern2, …, patternn\]) {#multifuzzymatchallindiceshaystack-distance-pattern1-pattern2-patternn} +## multiFuzzyMatchAllIndices(haystack, distance, \[pattern1, pattern2, ..., patternn\]) {#multifuzzymatchallindiceshaystack-distance-pattern1-pattern2-patternn} То же, что и `multiFuzzyMatchAny`, только возвращает массив всех индексов всех подходящих регулярных выражений в любом порядке в пределах константного редакционного расстояния. diff --git a/docs/ru/sql-reference/functions/tuple-functions.md b/docs/ru/sql-reference/functions/tuple-functions.md index c702e5d00b1..70ae44aa627 100644 --- a/docs/ru/sql-reference/functions/tuple-functions.md +++ b/docs/ru/sql-reference/functions/tuple-functions.md @@ -9,15 +9,15 @@ sidebar_label: Функции для работы с кортежами ## tuple {#tuple} Функция, позволяющая сгруппировать несколько столбцов. -Для столбцов, имеющих типы T1, T2, … возвращает кортеж типа Tuple(T1, T2, …), содержащий эти столбцы. Выполнение функции ничего не стоит. +Для столбцов, имеющих типы T1, T2, ... возвращает кортеж типа Tuple(T1, T2, ...), содержащий эти столбцы. Выполнение функции ничего не стоит. Кортежи обычно используются как промежуточное значение в качестве аргумента операторов IN, или для создания списка формальных параметров лямбда-функций. Кортежи не могут быть записаны в таблицу. -С помощью функции реализуется оператор `(x, y, …)`. +С помощью функции реализуется оператор `(x, y, ...)`. **Синтаксис** ``` sql -tuple(x, y, …) +tuple(x, y, ...) ``` ## tupleElement {#tupleelement} diff --git a/docs/ru/sql-reference/functions/url-functions.md b/docs/ru/sql-reference/functions/url-functions.md index 3c6e6151ef8..087891f4347 100644 --- a/docs/ru/sql-reference/functions/url-functions.md +++ b/docs/ru/sql-reference/functions/url-functions.md @@ -14,7 +14,7 @@ sidebar_label: "Функции для работы с URL" ### protocol {#protocol} -Возвращает протокол. Примеры: http, ftp, mailto, magnet… +Возвращает протокол. Примеры: http, ftp, mailto, magnet... ### domain {#domain} diff --git a/docs/ru/sql-reference/statements/alter/comment.md b/docs/ru/sql-reference/statements/alter/comment.md index 727af15d03e..f841c8540f3 100644 --- a/docs/ru/sql-reference/statements/alter/comment.md +++ b/docs/ru/sql-reference/statements/alter/comment.md @@ -4,7 +4,7 @@ sidebar_position: 51 sidebar_label: COMMENT --- -# ALTER TABLE … MODIFY COMMENT {#alter-modify-comment} +# ALTER TABLE ... MODIFY COMMENT {#alter-modify-comment} Добавляет, изменяет или удаляет комментарий к таблице, независимо от того, был ли он установлен раньше или нет. Изменение комментария отражается как в системной таблице [system.tables](../../../operations/system-tables/tables.md), так и в результате выполнения запроса `SHOW CREATE TABLE`. diff --git a/docs/ru/sql-reference/statements/alter/delete.md b/docs/ru/sql-reference/statements/alter/delete.md index dc968a17349..c91a79f5cdd 100644 --- a/docs/ru/sql-reference/statements/alter/delete.md +++ b/docs/ru/sql-reference/statements/alter/delete.md @@ -4,7 +4,7 @@ sidebar_position: 39 sidebar_label: DELETE --- -# ALTER TABLE … DELETE {#alter-mutations} +# ALTER TABLE ... DELETE {#alter-mutations} ``` sql ALTER TABLE [db.]table [ON CLUSTER cluster] DELETE WHERE filter_expr diff --git a/docs/ru/sql-reference/statements/alter/index.md b/docs/ru/sql-reference/statements/alter/index.md index 07f5ff0a298..e8b8af39e11 100644 --- a/docs/ru/sql-reference/statements/alter/index.md +++ b/docs/ru/sql-reference/statements/alter/index.md @@ -46,7 +46,7 @@ ALTER TABLE [db].name [ON CLUSTER cluster] ADD|DROP|CLEAR|COMMENT|MODIFY COLUMN ### Мутации {#mutations} -Мутации - разновидность запроса ALTER, позволяющая изменять или удалять данные в таблице. В отличие от стандартных запросов [ALTER TABLE … DELETE](../../../sql-reference/statements/alter/delete.md) и [ALTER TABLE … UPDATE](../../../sql-reference/statements/alter/update.md), рассчитанных на точечное изменение данных, область применения мутаций - достаточно тяжёлые изменения, затрагивающие много строк в таблице. Поддержана для движков таблиц семейства [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md), в том числе для движков с репликацией. +Мутации - разновидность запроса ALTER, позволяющая изменять или удалять данные в таблице. В отличие от стандартных запросов [ALTER TABLE ... DELETE](../../../sql-reference/statements/alter/delete.md) и [ALTER TABLE ... UPDATE](../../../sql-reference/statements/alter/update.md), рассчитанных на точечное изменение данных, область применения мутаций - достаточно тяжёлые изменения, затрагивающие много строк в таблице. Поддержана для движков таблиц семейства [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md), в том числе для движков с репликацией. Конвертировать существующие таблицы для работы с мутациями не нужно. Но после применения первой мутации формат данных таблицы становится несовместимым с предыдущими версиями и откатиться на предыдущую версию уже не получится. diff --git a/docs/ru/sql-reference/statements/alter/update.md b/docs/ru/sql-reference/statements/alter/update.md index b2032ac77d1..01574a8a9b7 100644 --- a/docs/ru/sql-reference/statements/alter/update.md +++ b/docs/ru/sql-reference/statements/alter/update.md @@ -4,7 +4,7 @@ sidebar_position: 40 sidebar_label: UPDATE --- -# ALTER TABLE … UPDATE {#alter-table-update-statements} +# ALTER TABLE ... UPDATE {#alter-table-update-statements} ``` sql ALTER TABLE [db.]table [ON CLUSTER cluster] UPDATE column1 = expr1 [, ...] WHERE filter_expr diff --git a/docs/ru/sql-reference/statements/alter/view.md b/docs/ru/sql-reference/statements/alter/view.md index e6f6730ff99..53e295f6bbe 100644 --- a/docs/ru/sql-reference/statements/alter/view.md +++ b/docs/ru/sql-reference/statements/alter/view.md @@ -4,9 +4,9 @@ sidebar_position: 50 sidebar_label: VIEW --- -# Выражение ALTER TABLE … MODIFY QUERY {#alter-modify-query} +# Выражение ALTER TABLE ... MODIFY QUERY {#alter-modify-query} -Вы можете изменить запрос `SELECT`, который был задан при создании [материализованного представления](../create/view.md#materialized), с помощью запроса 'ALTER TABLE … MODIFY QUERY'. Используйте его если при создании материализованного представления не использовалась секция `TO [db.]name`. Настройка `allow_experimental_alter_materialized_view_structure` должна быть включена. +Вы можете изменить запрос `SELECT`, который был задан при создании [материализованного представления](../create/view.md#materialized), с помощью запроса 'ALTER TABLE ... MODIFY QUERY'. Используйте его если при создании материализованного представления не использовалась секция `TO [db.]name`. Настройка `allow_experimental_alter_materialized_view_structure` должна быть включена. Если при создании материализованного представления использовалась конструкция `TO [db.]name`, то для изменения отсоедините представление с помощью [DETACH](../detach.md), измените таблицу с помощью [ALTER TABLE](index.md), а затем снова присоедините запрос с помощью [ATTACH](../attach.md). diff --git a/docs/ru/sql-reference/statements/create/view.md b/docs/ru/sql-reference/statements/create/view.md index 032bdc6e6d4..8fa30446bb3 100644 --- a/docs/ru/sql-reference/statements/create/view.md +++ b/docs/ru/sql-reference/statements/create/view.md @@ -60,7 +60,7 @@ AS SELECT ... Если указано `POPULATE`, то при создании представления в него будут добавлены данные, уже содержащиеся в исходной таблице, как если бы был сделан запрос `CREATE TABLE ... AS SELECT ...` . Если `POPULATE` не указано, представление будет содержать только данные, добавленные в таблицу после создания представления. Использовать `POPULATE` не рекомендуется, так как в представление не попадут данные, добавляемые в таблицу во время создания представления. -Запрос `SELECT` может содержать `DISTINCT`, `GROUP BY`, `ORDER BY`, `LIMIT`… Следует иметь ввиду, что соответствующие преобразования будут выполняться независимо, на каждый блок вставляемых данных. Например, при наличии `GROUP BY`, данные будут агрегироваться при вставке, но только в рамках одной пачки вставляемых данных. Далее, данные не будут доагрегированы. Исключение - использование ENGINE, производящего агрегацию данных самостоятельно, например, `SummingMergeTree`. +Запрос `SELECT` может содержать `DISTINCT`, `GROUP BY`, `ORDER BY`, `LIMIT`... Следует иметь ввиду, что соответствующие преобразования будут выполняться независимо, на каждый блок вставляемых данных. Например, при наличии `GROUP BY`, данные будут агрегироваться при вставке, но только в рамках одной пачки вставляемых данных. Далее, данные не будут доагрегированы. Исключение - использование ENGINE, производящего агрегацию данных самостоятельно, например, `SummingMergeTree`. Выполнение запросов [ALTER](../../../sql-reference/statements/alter/view.md) над материализованными представлениями имеет свои особенности, поэтому эти запросы могут быть неудобными для использования. Если материализованное представление использует конструкцию `TO [db.]name`, то можно выполнить `DETACH` представления, `ALTER` для целевой таблицы и последующий `ATTACH` ранее отсоединенного (`DETACH`) представления. diff --git a/docs/ru/sql-reference/statements/insert-into.md b/docs/ru/sql-reference/statements/insert-into.md index 747e36b8809..309d4852b11 100644 --- a/docs/ru/sql-reference/statements/insert-into.md +++ b/docs/ru/sql-reference/statements/insert-into.md @@ -73,7 +73,7 @@ INSERT INTO insert_select_testtable VALUES (1, DEFAULT, 1) ; INSERT INTO [db.]table [(c1, c2, c3)] FORMAT format_name data_set ``` -Например, следующий формат запроса идентичен базовому варианту INSERT … VALUES: +Например, следующий формат запроса идентичен базовому варианту INSERT ... VALUES: ``` sql INSERT INTO [db.]table [(c1, c2, c3)] FORMAT Values (v11, v12, v13), (v21, v22, v23), ... diff --git a/docs/ru/sql-reference/table-functions/file.md b/docs/ru/sql-reference/table-functions/file.md index 5331cf00728..546a674d41a 100644 --- a/docs/ru/sql-reference/table-functions/file.md +++ b/docs/ru/sql-reference/table-functions/file.md @@ -116,7 +116,7 @@ SELECT count(*) FROM file('{some,another}_dir/*', 'TSV', 'name String, value UIn **Пример** -Запрос данных из файлов с именами `file000`, `file001`, … , `file999`: +Запрос данных из файлов с именами `file000`, `file001`, ... , `file999`: ``` sql SELECT count(*) FROM file('big_dir/file{0..9}{0..9}{0..9}', 'CSV', 'name String, value UInt32'); diff --git a/docs/ru/sql-reference/table-functions/s3.md b/docs/ru/sql-reference/table-functions/s3.md index fe40cb0c507..2847a95bf19 100644 --- a/docs/ru/sql-reference/table-functions/s3.md +++ b/docs/ru/sql-reference/table-functions/s3.md @@ -108,7 +108,7 @@ FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefi Если список файлов содержит диапазоны чисел с ведущими нулями, используйте конструкцию с фигурными скобками для каждой цифры отдельно или используйте `?`. ::: -Подсчитаем общее количество строк в файлах с именами `file-000.csv`, `file-001.csv`, … , `file-999.csv`: +Подсчитаем общее количество строк в файлах с именами `file-000.csv`, `file-001.csv`, ... , `file-999.csv`: ``` sql SELECT count(*) diff --git a/docs/zh/changelog/index.md b/docs/zh/changelog/index.md index 7afcc07c6fb..c91d8bcf4d1 100644 --- a/docs/zh/changelog/index.md +++ b/docs/zh/changelog/index.md @@ -190,7 +190,7 @@ sidebar_label: "\u53D8\u66F4\u65E5\u5FD7" - 如果在获取系统数据时发生了zookeeper异常。副本,将其显示在单独的列中。 这实现了 [#9137](https://github.com/ClickHouse/ClickHouse/issues/9137) [#9138](https://github.com/ClickHouse/ClickHouse/pull/9138) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) - 原子删除destroy上的MergeTree数据部分。 [#8402](https://github.com/ClickHouse/ClickHouse/pull/8402) ([Vladimir Chebotarev](https://github.com/excitoon)) - 支持分布式表的行级安全性。 [#8926](https://github.com/ClickHouse/ClickHouse/pull/8926) ([伊万](https://github.com/abyss7)) -- Now we recognize suffix (like KB, KiB…) in settings values. [#8072](https://github.com/ClickHouse/ClickHouse/pull/8072) ([米哈伊尔\*科罗托夫](https://github.com/millb)) +- Now we recognize suffix (like KB, KiB...) in settings values. [#8072](https://github.com/ClickHouse/ClickHouse/pull/8072) ([米哈伊尔\*科罗托夫](https://github.com/millb)) - 在构建大型连接的结果时防止内存不足。 [#8637](https://github.com/ClickHouse/ClickHouse/pull/8637) ([Artem Zuikov](https://github.com/4ertus2)) - 在交互模式下为建议添加群集名称 `clickhouse-client`. [#8709](https://github.com/ClickHouse/ClickHouse/pull/8709) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) - Initialize query profiler for all threads in a group, e.g. it allows to fully profile insert-queries [#8820](https://github.com/ClickHouse/ClickHouse/pull/8820) ([伊万](https://github.com/abyss7)) @@ -523,7 +523,7 @@ sidebar_label: "\u53D8\u66F4\u65E5\u5FD7" - 现在后台在磁盘之间移动,运行它的seprate线程池。 [#7670](https://github.com/ClickHouse/ClickHouse/pull/7670) ([Vladimir Chebotarev](https://github.com/excitoon)) - `SYSTEM RELOAD DICTIONARY` 现在同步执行。 [#8240](https://github.com/ClickHouse/ClickHouse/pull/8240) ([维塔利\*巴拉诺夫](https://github.com/vitlibar)) - 堆栈跟踪现在显示物理地址(对象文件中的偏移量),而不是虚拟内存地址(加载对象文件的位置)。 这允许使用 `addr2line` 当二进制独立于位置并且ASLR处于活动状态时。 这修复 [#8360](https://github.com/ClickHouse/ClickHouse/issues/8360). [#8387](https://github.com/ClickHouse/ClickHouse/pull/8387) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) -- 支持行级安全筛选器的新语法: `…
`. 修复 [#5779](https://github.com/ClickHouse/ClickHouse/issues/5779). [#8381](https://github.com/ClickHouse/ClickHouse/pull/8381) ([伊万](https://github.com/abyss7)) +- 支持行级安全筛选器的新语法: `...
`. 修复 [#5779](https://github.com/ClickHouse/ClickHouse/issues/5779). [#8381](https://github.com/ClickHouse/ClickHouse/pull/8381) ([伊万](https://github.com/abyss7)) - 现在 `cityHash` 功能可以与工作 `Decimal` 和 `UUID` 类型。 修复 [#5184](https://github.com/ClickHouse/ClickHouse/issues/5184). [#7693](https://github.com/ClickHouse/ClickHouse/pull/7693) ([米哈伊尔\*科罗托夫](https://github.com/millb)) - 从系统日志中删除了固定的索引粒度(它是1024),因为它在实现自适应粒度之后已经过时。 [#7698](https://github.com/ClickHouse/ClickHouse/pull/7698) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) - 当ClickHouse在没有SSL的情况下编译时,启用MySQL兼容服务器。 [#7852](https://github.com/ClickHouse/ClickHouse/pull/7852) ([尤里\*巴拉诺夫](https://github.com/yurriy)) diff --git a/docs/zh/development/style.md b/docs/zh/development/style.md index c0a08291e02..724b22ad461 100644 --- a/docs/zh/development/style.md +++ b/docs/zh/development/style.md @@ -53,7 +53,7 @@ memcpy(&buf[place_value], &x, sizeof(x)); for (size_t i = 0; i < rows; i += storage.index_granularity) ``` -**7.** 在二元运算符(`+`,`-`,`*`,`/`,`%`,…)和三元运算符 `?:` 周围添加空格。 +**7.** 在二元运算符(`+`,`-`,`*`,`/`,`%`,...)和三元运算符 `?:` 周围添加空格。 ``` cpp UInt16 year = (s[0] - '0') * 1000 + (s[1] - '0') * 100 + (s[2] - '0') * 10 + (s[3] - '0'); @@ -82,7 +82,7 @@ dst.ClickGoodEvent = click.GoodEvent; 如有必要,运算符可以包裹到下一行。 在这种情况下,它前面的偏移量增加。 -**11.** 不要使用空格来分开一元运算符 (`--`, `++`, `*`, `&`, …) 和参数。 +**11.** 不要使用空格来分开一元运算符 (`--`, `++`, `*`, `&`, ...) 和参数。 **12.** 在逗号后面加一个空格,而不是在之前。同样的规则也适合 `for` 循环中的分号。 @@ -111,7 +111,7 @@ public: **16.** 如果对整个文件使用相同的 `namespace`,并且没有其他重要的东西,则 `namespace` 中不需要偏移量。 -**17.** 在 `if`, `for`, `while` 中包裹的代码块中,若代码是一个单行的 `statement`,那么大括号是可选的。 可以将 `statement` 放到一行中。这个规则同样适用于嵌套的 `if`, `for`, `while`, … +**17.** 在 `if`, `for`, `while` 中包裹的代码块中,若代码是一个单行的 `statement`,那么大括号是可选的。 可以将 `statement` 放到一行中。这个规则同样适用于嵌套的 `if`, `for`, `while`, ... 但是如果内部 `statement` 包含大括号或 `else`,则外部块应该用大括号括起来。 @@ -262,7 +262,7 @@ void executeQuery( 这个示例来源于 http://home.tamk.fi/~jaalto/course/coding-style/doc/unmaintainable-code/。 -**7.** 不要在每个文件的开头写入垃圾注释(作者,创建日期…)。 +**7.** 不要在每个文件的开头写入垃圾注释(作者,创建日期...)。 **8.** 单行注释用三个斜杆: `///` ,多行注释以 `/**`开始。 这些注释会当做文档。 diff --git a/docs/zh/engines/table-engines/integrations/hdfs.md b/docs/zh/engines/table-engines/integrations/hdfs.md index 55648afe407..be673b6ce92 100644 --- a/docs/zh/engines/table-engines/integrations/hdfs.md +++ b/docs/zh/engines/table-engines/integrations/hdfs.md @@ -103,7 +103,7 @@ CREATE TABLE table_with_asterisk (name String, value UInt32) ENGINE = HDFS('hdfs **示例** -创建具有名为文件的表 `file000`, `file001`, … , `file999`: +创建具有名为文件的表 `file000`, `file001`, ... , `file999`: ``` sql CREARE TABLE big_table (name String, value UInt32) ENGINE = HDFS('hdfs://hdfs1:9000/big_dir/file{0..9}{0..9}{0..9}', 'CSV') diff --git a/docs/zh/engines/table-engines/integrations/s3.md b/docs/zh/engines/table-engines/integrations/s3.md index f2585decabf..f18814675c3 100644 --- a/docs/zh/engines/table-engines/integrations/s3.md +++ b/docs/zh/engines/table-engines/integrations/s3.md @@ -109,7 +109,7 @@ CREATE TABLE table_with_asterisk (name String, value UInt32) ENGINE = S3('https: **示例** -使用文件`file-000.csv`, `file-001.csv`, … , `file-999.csv`来创建表: +使用文件`file-000.csv`, `file-001.csv`, ... , `file-999.csv`来创建表: ``` sql CREATE TABLE big_table (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV'); @@ -202,7 +202,7 @@ ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_p !!! warning "Warning" 如果文件列表中包含有从0开头的数字范围,请对每个数字分别使用带括号的结构,或者使用`?`. -4. 从文件`file-000.csv`, `file-001.csv`, … , `file-999.csv`创建表: +4. 从文件`file-000.csv`, `file-001.csv`, ... , `file-999.csv`创建表: ``` sql CREATE TABLE big_table (name String, value UInt32) diff --git a/docs/zh/engines/table-engines/mergetree-family/custom-partitioning-key.md b/docs/zh/engines/table-engines/mergetree-family/custom-partitioning-key.md index 4fecf4e5669..e283a4c7510 100644 --- a/docs/zh/engines/table-engines/mergetree-family/custom-partitioning-key.md +++ b/docs/zh/engines/table-engines/mergetree-family/custom-partitioning-key.md @@ -59,7 +59,7 @@ WHERE table = 'visits' └───────────┴────────────────┴────────┘ ``` -`partition` 列存储分区的名称。此示例中有两个分区:`201901` 和 `201902`。在 [ALTER … PARTITION](#alter_manipulations-with-partitions) 语句中你可以使用该列值来指定分区名称。 +`partition` 列存储分区的名称。此示例中有两个分区:`201901` 和 `201902`。在 [ALTER ... PARTITION](#alter_manipulations-with-partitions) 语句中你可以使用该列值来指定分区名称。 `name` 列为分区中数据片段的名称。在 [ALTER ATTACH PART](#alter_attach-partition) 语句中你可以使用此列值中来指定片段名称。 diff --git a/docs/zh/engines/table-engines/mergetree-family/mergetree.md b/docs/zh/engines/table-engines/mergetree-family/mergetree.md index bfa69338657..67bd681269b 100644 --- a/docs/zh/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/zh/engines/table-engines/mergetree-family/mergetree.md @@ -702,7 +702,7 @@ SETTINGS storage_policy = 'moving_from_ssd_to_hdd' - 插入(`INSERT`查询) - 后台合并和[数据变异](../../../sql-reference/statements/alter.md#alter-mutations) - 从另一个副本下载 -- [ALTER TABLE … FREEZE PARTITION](../../../sql-reference/statements/alter.md#alter_freeze-partition) 冻结分区 +- [ALTER TABLE ... FREEZE PARTITION](../../../sql-reference/statements/alter.md#alter_freeze-partition) 冻结分区 除了数据变异和冻结分区以外的情况下,数据按照以下逻辑存储到卷或磁盘上: @@ -713,7 +713,7 @@ SETTINGS storage_policy = 'moving_from_ssd_to_hdd' 在后台,数据片段基于剩余空间(`move_factor`参数)根据卷在配置文件中定义的顺序进行转移。数据永远不会从最后一个移出也不会从第一个移入。可以通过系统表 [system.part_log](../../../operations/system-tables/part_log.md#system_tables-part-log) (字段 `type = MOVE_PART`) 和 [system.parts](../../../operations/system-tables/parts.md#system_tables-parts) (字段 `path` 和 `disk`) 来监控后台的移动情况。具体细节可以通过服务器日志查看。 -用户可以通过 [ALTER TABLE … MOVE PART\|PARTITION … TO VOLUME\|DISK …](../../../sql-reference/statements/alter.md#alter_move-partition) 强制移动一个数据片段或分区到另外一个卷,所有后台移动的限制都会被考虑在内。这个查询会自行启动,无需等待后台操作完成。如果没有足够的可用空间或任何必须条件没有被满足,用户会收到报错信息。 +用户可以通过 [ALTER TABLE ... MOVE PART\|PARTITION ... TO VOLUME\|DISK ...](../../../sql-reference/statements/alter.md#alter_move-partition) 强制移动一个数据片段或分区到另外一个卷,所有后台移动的限制都会被考虑在内。这个查询会自行启动,无需等待后台操作完成。如果没有足够的可用空间或任何必须条件没有被满足,用户会收到报错信息。 数据移动不会妨碍到数据复制。也就是说,同一张表的不同副本可以指定不同的存储策略。 diff --git a/docs/zh/engines/table-engines/special/external-data.md b/docs/zh/engines/table-engines/special/external-data.md index 688e25402ab..06c6331b4f3 100644 --- a/docs/zh/engines/table-engines/special/external-data.md +++ b/docs/zh/engines/table-engines/special/external-data.md @@ -26,7 +26,7 @@ ClickHouse 允许向服务器发送处理查询所需的数据以及 SELECT 查 以下的参数是可选的:**–name** – 表的名称,如果省略,则采用 _data。 **–format** – 文件中的数据格式。 如果省略,则使用 TabSeparated。 -以下的参数必选一个:**–types** – 逗号分隔列类型的列表。例如:`UInt64,String`。列将被命名为 _1,_2,… +以下的参数必选一个:**–types** – 逗号分隔列类型的列表。例如:`UInt64,String`。列将被命名为 _1,_2,... **–structure**– 表结构的格式 `UserID UInt64`,`URL String`。定义列的名字以及类型。 在 «file» 中指定的文件将由 «format» 中指定的格式解析,使用在 «types» 或 «structure» 中指定的数据类型。该表将被上传到服务器,并在作为名称为 «name»临时表。 diff --git a/docs/zh/faq/general/olap.md b/docs/zh/faq/general/olap.md index b014419578b..c4b36b138fa 100644 --- a/docs/zh/faq/general/olap.md +++ b/docs/zh/faq/general/olap.md @@ -10,13 +10,13 @@ sidebar_position: 100 [OLAP](https://en.wikipedia.org/wiki/Online_analytical_processing) stands for Online Analytical Processing. It is a broad term that can be looked at from two perspectives: technical and business. But at the very high level, you can just read these words backward: Processing -: Some source data is processed… +: Some source data is processed... Analytical -: …to produce some analytical reports and insights… +: ...to produce some analytical reports and insights... Online -: …in real-time. +: ...in real-time. ## OLAP from the Business Perspective {#olap-from-the-business-perspective} diff --git a/docs/zh/getting-started/example-datasets/nyc-taxi.md b/docs/zh/getting-started/example-datasets/nyc-taxi.md index 9c487140df3..ceeb6fbb9e0 100644 --- a/docs/zh/getting-started/example-datasets/nyc-taxi.md +++ b/docs/zh/getting-started/example-datasets/nyc-taxi.md @@ -196,7 +196,7 @@ real 75m56.214s (也可以直接使用`COPY ... TO PROGRAM`从Postgres中导入数据) -数据中所有与天气相关的字段(precipitation……average_wind_speed)都填充了NULL。 所以,我们将从最终数据集中删除它们 +数据中所有与天气相关的字段(precipitation...average_wind_speed)都填充了NULL。 所以,我们将从最终数据集中删除它们 首先,我们使用单台服务器创建表,后面我们将在多台节点上创建这些表。 diff --git a/docs/zh/getting-started/example-datasets/uk-price-paid.mdx b/docs/zh/getting-started/example-datasets/uk-price-paid.mdx index ecfdcddbbe2..7d4c299b919 100644 --- a/docs/zh/getting-started/example-datasets/uk-price-paid.mdx +++ b/docs/zh/getting-started/example-datasets/uk-price-paid.mdx @@ -212,7 +212,7 @@ ORDER BY year └──────┴─────────┴───────────────────────────────────────────────────────┘ ``` -2020 年房价出事了!但这并不令人意外…… +2020 年房价出事了!但这并不令人意外... ### 查询 3. 最昂贵的社区 {#most-expensive-neighborhoods} diff --git a/docs/zh/guides/improving-query-performance/sparse-primary-indexes.md b/docs/zh/guides/improving-query-performance/sparse-primary-indexes.md index 758992e4084..975d5eb764c 100644 --- a/docs/zh/guides/improving-query-performance/sparse-primary-indexes.md +++ b/docs/zh/guides/improving-query-performance/sparse-primary-indexes.md @@ -371,7 +371,7 @@ UserID.bin,URL.bin,和EventTime.bin是UserID :::note - 最后一个索引条目(上图中的“mark 1082”)存储了上图中颗粒1082的主键列的最大值。 -- 索引条目(索引标记)不是基于表中的特定行,而是基于颗粒。例如,对于上图中的索引条目‘mark 0’,在我们的表中没有UserID为240.923且URL为“goal://metry=10000467796a411…”的行,相反,对于该表,有一个颗粒0,在该颗粒中,最小UserID值是240.923,最小URL值是“goal://metry=10000467796a411…”,这两个值来自不同的行。 +- 索引条目(索引标记)不是基于表中的特定行,而是基于颗粒。例如,对于上图中的索引条目‘mark 0’,在我们的表中没有UserID为240.923且URL为“goal://metry=10000467796a411...”的行,相反,对于该表,有一个颗粒0,在该颗粒中,最小UserID值是240.923,最小URL值是“goal://metry=10000467796a411...”,这两个值来自不同的行。 - 主索引文件完全加载到主内存中。如果文件大于可用的空闲内存空间,则ClickHouse将发生错误。 ::: diff --git a/docs/zh/index.md b/docs/zh/index.md index fab00dbcd1b..c092f296722 100644 --- a/docs/zh/index.md +++ b/docs/zh/index.md @@ -13,10 +13,10 @@ ClickHouse是一个用于联机分析(OLAP)的列式数据库管理系统(DBMS) | Row | WatchID | JavaEnable | Title | GoodEvent | EventTime | |-----|-------------|------------|--------------------|-----------|---------------------| -| #0 | 89354350662 | 1 | Investor Relations | 1 | 2016-05-18 05:19:20 | -| #1 | 90329509958 | 0 | Contact us | 1 | 2016-05-18 08:10:20 | -| #2 | 89953706054 | 1 | Mission | 1 | 2016-05-18 07:38:00 | -| #N | … | … | … | … | … | +| #0 | 89354350662 | 1 | Investor Relations | 1 | 2016-05-18 05:19:20 | +| #1 | 90329509958 | 0 | Contact us | 1 | 2016-05-18 08:10:20 | +| #2 | 89953706054 | 1 | Mission | 1 | 2016-05-18 07:38:00 | +| #N | ... | ... | ... | ... | ... | 处于同一行中的数据总是被物理的存储在一起。 @@ -24,13 +24,13 @@ ClickHouse是一个用于联机分析(OLAP)的列式数据库管理系统(DBMS) 在列式数据库系统中,数据按如下的顺序存储: -| Row: | #0 | #1 | #2 | #N | +| Row: | #0 | #1 | #2 | #N | |-------------|---------------------|---------------------|---------------------|-----| -| WatchID: | 89354350662 | 90329509958 | 89953706054 | … | -| JavaEnable: | 1 | 0 | 1 | … | -| Title: | Investor Relations | Contact us | Mission | … | -| GoodEvent: | 1 | 1 | 1 | … | -| EventTime: | 2016-05-18 05:19:20 | 2016-05-18 08:10:20 | 2016-05-18 07:38:00 | … | +| WatchID: | 89354350662 | 90329509958 | 89953706054 | ... | +| JavaEnable: | 1 | 0 | 1 | ... | +| Title: | Investor Relations | Contact us | Mission | ... | +| GoodEvent: | 1 | 1 | 1 | ... | +| EventTime: | 2016-05-18 05:19:20 | 2016-05-18 08:10:20 | 2016-05-18 07:38:00 | ... | 这些示例只显示了数据的排列顺序。来自不同列的值被单独存储,来自同一列的数据被存储在一起。 diff --git a/docs/zh/operations/settings/query-complexity.md b/docs/zh/operations/settings/query-complexity.md index 124d5fa5d1a..b1b5ca75018 100644 --- a/docs/zh/operations/settings/query-complexity.md +++ b/docs/zh/operations/settings/query-complexity.md @@ -196,7 +196,7 @@ Restrictions on the «maximum amount of something» can take the value 0, which Limits the number of rows in the hash table that is used when joining tables. -This settings applies to [SELECT … JOIN](../../sql-reference/statements/select/join.md#select-join) operations and the [Join](../../engines/table-engines/special/join.md) table engine. +This settings applies to [SELECT ... JOIN](../../sql-reference/statements/select/join.md#select-join) operations and the [Join](../../engines/table-engines/special/join.md) table engine. If a query contains multiple joins, ClickHouse checks this setting for every intermediate result. @@ -213,7 +213,7 @@ Default value: 0. Limits the size in bytes of the hash table used when joining tables. -This settings applies to [SELECT … JOIN](../../sql-reference/statements/select/join.md#select-join) operations and [Join table engine](../../engines/table-engines/special/join.md). +This settings applies to [SELECT ... JOIN](../../sql-reference/statements/select/join.md#select-join) operations and [Join table engine](../../engines/table-engines/special/join.md). If the query contains joins, ClickHouse checks this setting for every intermediate result. diff --git a/docs/zh/operations/settings/settings.md b/docs/zh/operations/settings/settings.md index c3b4194ed44..5e59196f56c 100644 --- a/docs/zh/operations/settings/settings.md +++ b/docs/zh/operations/settings/settings.md @@ -1002,7 +1002,7 @@ ClickHouse生成异常 ## count_distinct_implementation {#settings-count_distinct_implementation} -指定其中的 `uniq*` 函数应用于执行 [COUNT(DISTINCT …)](../../sql-reference/aggregate-functions/reference/count.md#agg_function-count) 建筑。 +指定其中的 `uniq*` 函数应用于执行 [COUNT(DISTINCT ...)](../../sql-reference/aggregate-functions/reference/count.md#agg_function-count) 建筑。 可能的值: diff --git a/docs/zh/operations/system-tables/dictionaries.md b/docs/zh/operations/system-tables/dictionaries.md index 0cf91e45e86..c7b1bdd04be 100644 --- a/docs/zh/operations/system-tables/dictionaries.md +++ b/docs/zh/operations/system-tables/dictionaries.md @@ -21,7 +21,7 @@ machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 - `FAILED_AND_RELOADING` — Could not load the dictionary as a result of an error and is loading now. - `origin` ([字符串](../../sql-reference/data-types/string.md)) — Path to the configuration file that describes the dictionary. - `type` ([字符串](../../sql-reference/data-types/string.md)) — Type of dictionary allocation. [在内存中存储字典](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md). -- `key` — [密钥类型](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md#ext_dict_structure-key):数字键 ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) or Сomposite key ([字符串](../../sql-reference/data-types/string.md)) — form “(type 1, type 2, …, type n)”. +- `key` — [密钥类型](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md#ext_dict_structure-key):数字键 ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) or Сomposite key ([字符串](../../sql-reference/data-types/string.md)) — form “(type 1, type 2, ..., type n)”. - `attribute.names` ([阵列](../../sql-reference/data-types/array.md)([字符串](../../sql-reference/data-types/string.md))) — Array of [属性名称](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md#ext_dict_structure-attributes) 由字典提供。 - `attribute.types` ([阵列](../../sql-reference/data-types/array.md)([字符串](../../sql-reference/data-types/string.md))) — Corresponding array of [属性类型](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md#ext_dict_structure-attributes) 这是由字典提供。 - `bytes_allocated` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Amount of RAM allocated for the dictionary. diff --git a/docs/zh/sql-reference/aggregate-functions/parametric-functions.md b/docs/zh/sql-reference/aggregate-functions/parametric-functions.md index cb1dcc35f5c..27d3375aebb 100644 --- a/docs/zh/sql-reference/aggregate-functions/parametric-functions.md +++ b/docs/zh/sql-reference/aggregate-functions/parametric-functions.md @@ -80,7 +80,7 @@ FROM 在这种情况下,您应该记住您不知道直方图bin边界。 -## sequenceMatch(pattern)(timestamp, cond1, cond2, …) {#function-sequencematch} +## sequenceMatch(pattern)(timestamp, cond1, cond2, ...) {#function-sequencematch} 检查序列是否包含与模式匹配的事件链。 @@ -167,7 +167,7 @@ SELECT sequenceMatch('(?1)(?2)')(time, number = 1, number = 2, number = 4) FROM - [sequenceCount](#function-sequencecount) -## sequenceCount(pattern)(time, cond1, cond2, …) {#function-sequencecount} +## sequenceCount(pattern)(time, cond1, cond2, ...) {#function-sequencecount} 计算与模式匹配的事件链的数量。该函数搜索不重叠的事件链。当前链匹配后,它开始搜索下一个链。 diff --git a/docs/zh/sql-reference/aggregate-functions/reference/quantiles.md b/docs/zh/sql-reference/aggregate-functions/reference/quantiles.md index 4dce65af1ed..253eb9ef82d 100644 --- a/docs/zh/sql-reference/aggregate-functions/reference/quantiles.md +++ b/docs/zh/sql-reference/aggregate-functions/reference/quantiles.md @@ -7,7 +7,7 @@ sidebar_position: 201 **语法** ``` sql -quantiles(level1, level2, …)(x) +quantiles(level1, level2, ...)(x) ``` 所有分位数函数(quantile)也有相应的分位数(quantiles)函数: `quantiles`, `quantilesDeterministic`, `quantilesTiming`, `quantilesTimingWeighted`, `quantilesExact`, `quantilesExactWeighted`, `quantilesTDigest`。 这些函数一次计算所列的级别的所有分位数, 并返回结果值的数组。 diff --git a/docs/zh/sql-reference/data-types/aggregatefunction.md b/docs/zh/sql-reference/data-types/aggregatefunction.md index e8f28b367a5..80648eb165b 100644 --- a/docs/zh/sql-reference/data-types/aggregatefunction.md +++ b/docs/zh/sql-reference/data-types/aggregatefunction.md @@ -1,7 +1,7 @@ --- slug: /zh/sql-reference/data-types/aggregatefunction --- -# AggregateFunction(name, types_of_arguments…) {#data-type-aggregatefunction} +# AggregateFunction(name, types_of_arguments...) {#data-type-aggregatefunction} 聚合函数的中间状态,可以通过聚合函数名称加`-State`后缀的形式得到它。与此同时,当您需要访问该类型的最终状态数据时,您需要以相同的聚合函数名加`-Merge`后缀的形式来得到最终状态数据。 diff --git a/docs/zh/sql-reference/data-types/domains/index.md b/docs/zh/sql-reference/data-types/domains/index.md index c123b10f6fe..9f12018732b 100644 --- a/docs/zh/sql-reference/data-types/domains/index.md +++ b/docs/zh/sql-reference/data-types/domains/index.md @@ -19,9 +19,9 @@ Domain类型是特定实现的类型,它总是与某个现存的基础类型 ### Domains的额外特性 {#domainsde-e-wai-te-xing} - 在执行SHOW CREATE TABLE 或 DESCRIBE TABLE时,其对应的列总是展示为Domain类型的名称 -- 在INSERT INTO domain_table(domain_column) VALUES(…)中输入数据总是以更人性化的格式进行输入 +- 在INSERT INTO domain_table(domain_column) VALUES(...)中输入数据总是以更人性化的格式进行输入 - 在SELECT domain_column FROM domain_table中数据总是以更人性化的格式输出 -- 在INSERT INTO domain_table FORMAT CSV …中,实现外部源数据以更人性化的格式载入 +- 在INSERT INTO domain_table FORMAT CSV ...中,实现外部源数据以更人性化的格式载入 ### Domains类型的限制 {#domainslei-xing-de-xian-zhi} diff --git a/docs/zh/sql-reference/data-types/fixedstring.md b/docs/zh/sql-reference/data-types/fixedstring.md index 633307938a9..d454e935fe7 100644 --- a/docs/zh/sql-reference/data-types/fixedstring.md +++ b/docs/zh/sql-reference/data-types/fixedstring.md @@ -18,8 +18,8 @@ slug: /zh/sql-reference/data-types/fixedstring 可以有效存储在`FixedString`类型的列中的值的示例: - 二进制表示的IP地址(IPv6使用`FixedString(16)`) -- 语言代码(ru_RU, en_US … ) -- 货币代码(USD, RUB … ) +- 语言代码(ru_RU, en_US ... ) +- 货币代码(USD, RUB ... ) - 二进制表示的哈希值(MD5使用`FixedString(16)`,SHA256使用`FixedString(32)`) 请使用[UUID](uuid.md)数据类型来存储UUID值,。 diff --git a/docs/zh/sql-reference/data-types/nested-data-structures/nested.md b/docs/zh/sql-reference/data-types/nested-data-structures/nested.md index 5ef8256b483..57b30de0881 100644 --- a/docs/zh/sql-reference/data-types/nested-data-structures/nested.md +++ b/docs/zh/sql-reference/data-types/nested-data-structures/nested.md @@ -1,7 +1,7 @@ --- slug: /zh/sql-reference/data-types/nested-data-structures/nested --- -# Nested(Name1 Type1, Name2 Type2, …) {#nestedname1-type1-name2-type2} +# Nested(Name1 Type1, Name2 Type2, ...) {#nestedname1-type1-name2-type2} 嵌套数据结构类似于嵌套表。嵌套数据结构的参数(列名和类型)与 CREATE 查询类似。每个表可以包含任意多行嵌套数据结构。 diff --git a/docs/zh/sql-reference/data-types/simpleaggregatefunction.md b/docs/zh/sql-reference/data-types/simpleaggregatefunction.md index 601cb602a78..fbaa76365ec 100644 --- a/docs/zh/sql-reference/data-types/simpleaggregatefunction.md +++ b/docs/zh/sql-reference/data-types/simpleaggregatefunction.md @@ -3,7 +3,7 @@ slug: /zh/sql-reference/data-types/simpleaggregatefunction --- # SimpleAggregateFunction {#data-type-simpleaggregatefunction} -`SimpleAggregateFunction(name, types_of_arguments…)` 数据类型存储聚合函数的当前值, 并不像 [`AggregateFunction`](../../sql-reference/data-types/aggregatefunction.md) 那样存储其全部状态。这种优化可以应用于具有以下属性函数: 将函数 `f` 应用于行集合 `S1 UNION ALL S2` 的结果,可以通过将 `f` 分别应用于行集合的部分, 然后再将 `f` 应用于结果来获得: `f(S1 UNION ALL S2) = f(f(S1) UNION ALL f(S2))`。 这个属性保证了部分聚合结果足以计算出合并的结果,所以我们不必存储和处理任何额外的数据。 +`SimpleAggregateFunction(name, types_of_arguments...)` 数据类型存储聚合函数的当前值, 并不像 [`AggregateFunction`](../../sql-reference/data-types/aggregatefunction.md) 那样存储其全部状态。这种优化可以应用于具有以下属性函数: 将函数 `f` 应用于行集合 `S1 UNION ALL S2` 的结果,可以通过将 `f` 分别应用于行集合的部分, 然后再将 `f` 应用于结果来获得: `f(S1 UNION ALL S2) = f(f(S1) UNION ALL f(S2))`。 这个属性保证了部分聚合结果足以计算出合并的结果,所以我们不必存储和处理任何额外的数据。 支持以下聚合函数: diff --git a/docs/zh/sql-reference/data-types/tuple.md b/docs/zh/sql-reference/data-types/tuple.md index 004c80ff916..38813701c70 100644 --- a/docs/zh/sql-reference/data-types/tuple.md +++ b/docs/zh/sql-reference/data-types/tuple.md @@ -1,7 +1,7 @@ --- slug: /zh/sql-reference/data-types/tuple --- -# Tuple(T1, T2, …) {#tuplet1-t2} +# Tuple(T1, T2, ...) {#tuplet1-t2} 元组,其中每个元素都有单独的 [类型](index.md#data_types)。 diff --git a/docs/zh/sql-reference/functions/array-functions.md b/docs/zh/sql-reference/functions/array-functions.md index d150b94b8af..69db34e4a36 100644 --- a/docs/zh/sql-reference/functions/array-functions.md +++ b/docs/zh/sql-reference/functions/array-functions.md @@ -152,7 +152,7 @@ SELECT range(5), range(1, 5), range(1, 5, 2), range(-1, 5, 2); └─────────────┴─────────────┴────────────────┴─────────────────┘ ``` -## array(x1, …), operator \[x1, …\] {#arrayx1-operator-x1} +## array(x1, ...), operator \[x1, ...\] {#arrayx1-operator-x1} 使用函数的参数作为数组元素创建一个数组。 参数必须是常量,并且具有最小公共类型的类型。必须至少传递一个参数,否则将不清楚要创建哪种类型的数组。也就是说,你不能使用这个函数来创建一个空数组(为此,使用上面描述的’emptyArray  \*’函数)。 @@ -337,7 +337,7 @@ SELECT indexOf([1, 3, NULL, NULL], NULL) 设置为«NULL»的元素将作为普通的元素值处理。 -## arrayCount(\[func,\] arr1, …) {#array-count} +## arrayCount(\[func,\] arr1, ...) {#array-count} `func`将arr数组作为参数,其返回结果为非零值的数量。如果未指定“func”,则返回数组中非零元素的数量。 @@ -363,7 +363,7 @@ SELECT countEqual([1, 2, NULL, NULL], NULL) ## arrayEnumerate(arr) {#array_functions-arrayenumerate} -返回 Array \[1, 2, 3, …, length (arr) \] +返回 Array \[1, 2, 3, ..., length (arr) \] 此功能通常与ARRAY JOIN一起使用。它允许在应用ARRAY JOIN后为每个数组计算一次。例如: @@ -403,7 +403,7 @@ WHERE (CounterID = 160656) AND notEmpty(GoalsReached) 此功能也可用于高阶函数。例如,您可以使用它来获取与条件匹配的元素的数组索引。 -## arrayEnumerateUniq(arr, …) {#arrayenumerateuniqarr} +## arrayEnumerateUniq(arr, ...) {#arrayenumerateuniqarr} 返回与源数组大小相同的数组,其中每个元素表示与其下标对应的源数组元素在源数组中出现的次数。 例如:arrayEnumerateUniq( \[10,20,10,30 \])=  \[1,1,2,1 \]。 @@ -621,7 +621,7 @@ SELECT arraySlice([1, 2, NULL, 4, 5], 2, 3) AS res 设置为«NULL»的数组元素作为普通的数组元素值处理。 -## arraySort(\[func,\] arr, …) {#array_functions-reverse-sort} +## arraySort(\[func,\] arr, ...) {#array_functions-reverse-sort} 以升序对`arr`数组的元素进行排序。如果指定了`func`函数,则排序顺序由`func`函数的调用结果决定。如果`func`接受多个参数,那么`arraySort`函数也将解析与`func`函数参数相同数量的数组参数。更详细的示例在`arraySort`的末尾。 @@ -721,7 +721,7 @@ SELECT arraySort((x, y) -> -y, [0, 1, 2], [1, 2, 3]) as res; !!! 注意 "注意" 为了提高排序效率, 使用了[施瓦茨变换](https://en.wikipedia.org/wiki/Schwartzian_transform)。 -## arrayReverseSort(\[func,\] arr, …) {#array_functions-reverse-sort} +## arrayReverseSort(\[func,\] arr, ...) {#array_functions-reverse-sort} 以降序对`arr`数组的元素进行排序。如果指定了`func`函数,则排序顺序由`func`函数的调用结果决定。如果`func`接受多个参数,那么`arrayReverseSort`函数也将解析与`func`函数参数相同数量的数组作为参数。更详细的示例在`arrayReverseSort`的末尾。 @@ -822,7 +822,7 @@ SELECT arrayReverseSort((x, y) -> -y, [4, 3, 5], [1, 2, 3]) AS res; └─────────┘ ``` -## arrayUniq(arr, …) {#arrayuniqarr} +## arrayUniq(arr, ...) {#arrayuniqarr} 如果传递一个参数,则计算数组中不同元素的数量。 如果传递了多个参数,则它计算多个数组中相应位置的不同元素元组的数量。 @@ -1221,7 +1221,7 @@ select arrayAUC([0.1, 0.4, 0.35, 0.8], [0, 0, 1, 1]); └───────────────────────────────────────────────┘ ``` -## arrayMap(func, arr1, …) {#array-map} +## arrayMap(func, arr1, ...) {#array-map} 将从 `func` 函数的原始应用中获得的数组返回给 `arr` 数组中的每个元素。 @@ -1251,7 +1251,7 @@ SELECT arrayMap((x, y) -> (x, y), [1, 2, 3], [4, 5, 6]) AS res 请注意,`arrayMap` 是一个[高阶函数](../../sql-reference/functions/index.md#higher-order-functions)。 您必须将 lambda 函数作为第一个参数传递给它,并且不能省略。 -## arrayFilter(func, arr1, …) {#array-filter} +## arrayFilter(func, arr1, ...) {#array-filter} 返回一个仅包含 `arr1` 中的元素的数组,其中 `func` 返回的值不是 0。 @@ -1284,7 +1284,7 @@ SELECT 请注意,`arrayFilter`是一个[高阶函数](../../sql-reference/functions/index.md#higher-order-functions)。 您必须将 lambda 函数作为第一个参数传递给它,并且不能省略。 -## arrayFill(func, arr1, …) {#array-fill} +## arrayFill(func, arr1, ...) {#array-fill} 从第一个元素到最后一个元素扫描`arr1`,如果`func`返回0,则用`arr1[i - 1]`替换`arr1[i]`。`arr1`的第一个元素不会被替换。 @@ -1302,7 +1302,7 @@ SELECT arrayFill(x -> not isNull(x), [1, null, 3, 11, 12, null, null, 5, 6, 14, 请注意,`arrayFill` 是一个[高阶函数](../../sql-reference/functions/index.md#higher-order-functions)。 您必须将 lambda 函数作为第一个参数传递给它,并且不能省略。 -## arrayReverseFill(func, arr1, …) {#array-reverse-fill} +## arrayReverseFill(func, arr1, ...) {#array-reverse-fill} 从最后一个元素到第一个元素扫描`arr1`,如果`func`返回0,则用`arr1[i + 1]`替换`arr1[i]`。`arr1`的最后一个元素不会被替换。 @@ -1320,7 +1320,7 @@ SELECT arrayReverseFill(x -> not isNull(x), [1, null, 3, 11, 12, null, null, 5, 请注意,`arrayReverseFill`是一个[高阶函数](../../sql-reference/functions/index.md#higher-order-functions)。 您必须将 lambda 函数作为第一个参数传递给它,并且不能省略。 -## arraySplit(func, arr1, …) {#array-split} +## arraySplit(func, arr1, ...) {#array-split} 将 `arr1` 拆分为多个数组。当 `func` 返回 0 以外的值时,数组将在元素的左侧拆分。数组不会在第一个元素之前被拆分。 @@ -1338,7 +1338,7 @@ SELECT arraySplit((x, y) -> y, [1, 2, 3, 4, 5], [1, 0, 0, 1, 0]) AS res 请注意,`arraySplit`是一个[高阶函数](../../sql-reference/functions/index.md#higher-order-functions)。 您必须将 lambda 函数作为第一个参数传递给它,并且不能省略。 -## arrayReverseSplit(func, arr1, …) {#array-reverse-split} +## arrayReverseSplit(func, arr1, ...) {#array-reverse-split} 将 `arr1` 拆分为多个数组。当 `func` 返回 0 以外的值时,数组将在元素的右侧拆分。数组不会在最后一个元素之后被拆分。 @@ -1356,37 +1356,37 @@ SELECT arrayReverseSplit((x, y) -> y, [1, 2, 3, 4, 5], [1, 0, 0, 1, 0]) AS res 请注意,`arrayReverseSplit`是一个[高阶函数](../../sql-reference/functions/index.md#higher-order-functions)。 您必须将 lambda 函数作为第一个参数传递给它,并且不能省略。 -## arrayExists(\[func,\] arr1, …) {#arrayexistsfunc-arr1} +## arrayExists(\[func,\] arr1, ...) {#arrayexistsfunc-arr1} 如果 `arr` 中至少有一个元素 `func` 返回 0 以外的值,则返回 1。否则,它返回 0。 请注意,`arrayExists`是一个[高阶函数](../../sql-reference/functions/index.md#higher-order-functions)。您可以将 lambda 函数作为第一个参数传递给它,并且不能省略。 -## arrayAll(\[func,\] arr1, …) {#arrayallfunc-arr1} +## arrayAll(\[func,\] arr1, ...) {#arrayallfunc-arr1} 如果 `func` 为 `arr` 中的所有元素返回 0 以外的值,则返回 1。否则,它返回 0。 请注意,`arrayAll`是一个[高阶函数](../../sql-reference/functions/index.md#higher-order-functions)。您可以将 lambda 函数作为第一个参数传递给它,并且不能省略。 -## arrayFirst(func, arr1, …) {#array-first} +## arrayFirst(func, arr1, ...) {#array-first} 返回 `arr1` 数组中 `func` 返回非 0 的值的第一个元素。 请注意,`arrayFirst`是一个[高阶函数](../../sql-reference/functions/index.md#higher-order-functions)。您必须将 lambda 函数作为第一个参数传递给它,并且不能省略。 -## arrayLast(func, arr1, …) {#array-last} +## arrayLast(func, arr1, ...) {#array-last} 返回 `arr1` 数组中的最后一个元素,其中 `func` 返回的值不是 0。 请注意,`arrayLast`是一个[高阶函数](../../sql-reference/functions/index.md#higher-order-functions)。您必须将 lambda 函数作为第一个参数传递给它,并且不能省略。 -## arrayFirstIndex(func, arr1, …) {#array-first-index} +## arrayFirstIndex(func, arr1, ...) {#array-first-index} 返回 `arr1` 数组中第一个元素的索引,其中 `func` 返回的值不是 0。 请注意,`arrayFirstIndex`是一个[高阶函数](../../sql-reference/functions/index.md#higher-order-functions)。您必须将 lambda 函数作为第一个参数传递给它,并且不能省略。 -## arrayLastIndex(func, arr1, …) {#array-last-index} +## arrayLastIndex(func, arr1, ...) {#array-last-index} 返回 `arr1` 数组中最后一个元素的索引,其中 `func` 返回的值不是 0。 @@ -1612,7 +1612,7 @@ SELECT arrayAvg(x -> (x * x), [2, 4]) AS res; └─────┘ ``` -## arrayCumSum(\[func,\] arr1, …) {#arraycumsumfunc-arr1} +## arrayCumSum(\[func,\] arr1, ...) {#arraycumsumfunc-arr1} 返回源数组中元素的部分和的数组(运行总和)。如果指定了 func 函数,则数组元素的值在求和之前由该函数转换。 diff --git a/docs/zh/sql-reference/functions/date-time-functions.md b/docs/zh/sql-reference/functions/date-time-functions.md index d6493ffe605..18b9f3495c0 100644 --- a/docs/zh/sql-reference/functions/date-time-functions.md +++ b/docs/zh/sql-reference/functions/date-time-functions.md @@ -443,7 +443,7 @@ SELECT toStartOfSecond(dt64, 'Asia/Istanbul'); `toISOWeek()`是一个兼容函数,等效于`toWeek(date,3)`。 下表描述了mode参数的工作方式。 -| Mode | First day of week | Range | Week 1 is the first week … | +| Mode | First day of week | Range | Week 1 is the first week ... | |------|-------------------|-------|-------------------------------| | 0 | Sunday | 0-53 | with a Sunday in this year | | 1 | Monday | 0-53 | with 4 or more days this year | diff --git a/docs/zh/sql-reference/functions/higher-order-functions.md b/docs/zh/sql-reference/functions/higher-order-functions.md index 929dc6f3ea7..0e08f88bba1 100644 --- a/docs/zh/sql-reference/functions/higher-order-functions.md +++ b/docs/zh/sql-reference/functions/higher-order-functions.md @@ -15,13 +15,13 @@ slug: /zh/sql-reference/functions/higher-order-functions 除了’arrayMap’和’arrayFilter’以外的所有其他函数,都可以省略第一个参数(lambda函数)。在这种情况下,默认返回数组元素本身。 -### arrayMap(func, arr1, …) {#higher_order_functions-array-map} +### arrayMap(func, arr1, ...) {#higher_order_functions-array-map} 将arr 将从’func’函数的原始应用程序获得的数组返回到’arr’数组中的每个元素。 返回从原始应用程序获得的数组 ‘func’ 函数中的每个元素 ‘arr’ 阵列。 -### arrayFilter(func, arr1, …) {#arrayfilterfunc-arr1} +### arrayFilter(func, arr1, ...) {#arrayfilterfunc-arr1} 返回一个仅包含以下元素的数组 ‘arr1’ 对于哪个 ‘func’ 返回0以外的内容。 @@ -48,31 +48,31 @@ SELECT │ [2] │ └─────┘ -### arrayCount(\[func,\] arr1, …) {#arraycountfunc-arr1} +### arrayCount(\[func,\] arr1, ...) {#arraycountfunc-arr1} 返回数组arr中非零元素的数量,如果指定了’func’,则通过’func’的返回值确定元素是否为非零元素。 -### arrayExists(\[func,\] arr1, …) {#arrayexistsfunc-arr1} +### arrayExists(\[func,\] arr1, ...) {#arrayexistsfunc-arr1} 返回数组’arr’中是否存在非零元素,如果指定了’func’,则使用’func’的返回值确定元素是否为非零元素。 -### arrayAll(\[func,\] arr1, …) {#arrayallfunc-arr1} +### arrayAll(\[func,\] arr1, ...) {#arrayallfunc-arr1} 返回数组’arr’中是否存在为零的元素,如果指定了’func’,则使用’func’的返回值确定元素是否为零元素。 -### arraySum(\[func,\] arr1, …) {#arraysumfunc-arr1} +### arraySum(\[func,\] arr1, ...) {#arraysumfunc-arr1} 计算arr数组的总和,如果指定了’func’,则通过’func’的返回值计算数组的总和。 -### arrayFirst(func, arr1, …) {#arrayfirstfunc-arr1} +### arrayFirst(func, arr1, ...) {#arrayfirstfunc-arr1} 返回数组中第一个匹配的元素,函数使用’func’匹配所有元素,直到找到第一个匹配的元素。 -### arrayFirstIndex(func, arr1, …) {#arrayfirstindexfunc-arr1} +### arrayFirstIndex(func, arr1, ...) {#arrayfirstindexfunc-arr1} 返回数组中第一个匹配的元素的下标索引,函数使用’func’匹配所有元素,直到找到第一个匹配的元素。 -### arrayCumSum(\[func,\] arr1, …) {#arraycumsumfunc-arr1} +### arrayCumSum(\[func,\] arr1, ...) {#arraycumsumfunc-arr1} 返回源数组部分数据的总和,如果指定了`func`函数,则使用`func`的返回值计算总和。 @@ -98,7 +98,7 @@ SELECT arrayCumSumNonNegative([1, 1, -4, 1]) AS res │ [1,2,0,1] │ └───────────┘ -### arraySort(\[func,\] arr1, …) {#arraysortfunc-arr1} +### arraySort(\[func,\] arr1, ...) {#arraysortfunc-arr1} 返回升序排序`arr1`的结果。如果指定了`func`函数,则排序顺序由`func`的结果决定。 @@ -124,7 +124,7 @@ SELECT arraySort([1, nan, 2, NULL, 3, nan, 4, NULL]) │ [1,2,3,4,nan,nan,NULL,NULL] │ └───────────────────────────────────────────────┘ -### arrayReverseSort(\[func,\] arr1, …) {#arrayreversesortfunc-arr1} +### arrayReverseSort(\[func,\] arr1, ...) {#arrayreversesortfunc-arr1} 返回降序排序`arr1`的结果。如果指定了`func`函数,则排序顺序由`func`的结果决定。 diff --git a/docs/zh/sql-reference/functions/in-functions.md b/docs/zh/sql-reference/functions/in-functions.md index 346e076310e..9858159a495 100644 --- a/docs/zh/sql-reference/functions/in-functions.md +++ b/docs/zh/sql-reference/functions/in-functions.md @@ -10,10 +10,10 @@ sidebar_label: IN 运算符 请参阅[IN 运算符](../../sql-reference/operators/in.md#select-in-operators)部分。 -## tuple(x, y, …), 运算符 (x, y, …) {#tuplex-y-operator-x-y} +## tuple(x, y, ...), 运算符 (x, y, ...) {#tuplex-y-operator-x-y} 函数用于对多个列进行分组。 -对于具有类型T1,T2,…的列,它返回包含这些列的元组(T1,T2,…)。 执行该函数没有任何成本。 +对于具有类型T1,T2,...的列,它返回包含这些列的元组(T1,T2,...)。 执行该函数没有任何成本。 元组通常用作IN运算符的中间参数值,或用于创建lambda函数的形参列表。 元组不能写入表。 ## tupleElement(tuple, n), 运算符 x.N {#tupleelementtuple-n-operator-x-n} diff --git a/docs/zh/sql-reference/functions/json-functions.md b/docs/zh/sql-reference/functions/json-functions.md index 52ec0ed1535..f07de564847 100644 --- a/docs/zh/sql-reference/functions/json-functions.md +++ b/docs/zh/sql-reference/functions/json-functions.md @@ -56,7 +56,7 @@ slug: /zh/sql-reference/functions/json-functions 以下函数基于[simdjson](https://github.com/lemire/simdjson),专为更复杂的JSON解析要求而设计。但上述假设2仍然适用。 -## JSONHas(json\[, indices_or_keys\]…) {#jsonhasjson-indices-or-keys} +## JSONHas(json\[, indices_or_keys\]...) {#jsonhasjson-indices-or-keys} 如果JSON中存在该值,则返回`1`。 @@ -83,7 +83,7 @@ slug: /zh/sql-reference/functions/json-functions select JSONExtractKey('{"a": "hello", "b": [-100, 200.0, 300]}', -2) = 'a' select JSONExtractString('{"a": "hello", "b": [-100, 200.0, 300]}', 1) = 'hello' -## JSONLength(json\[, indices_or_keys\]…) {#jsonlengthjson-indices-or-keys} +## JSONLength(json\[, indices_or_keys\]...) {#jsonlengthjson-indices-or-keys} 返回JSON数组或JSON对象的长度。 @@ -94,7 +94,7 @@ slug: /zh/sql-reference/functions/json-functions select JSONLength('{"a": "hello", "b": [-100, 200.0, 300]}', 'b') = 3 select JSONLength('{"a": "hello", "b": [-100, 200.0, 300]}') = 2 -## JSONType(json\[, indices_or_keys\]…) {#jsontypejson-indices-or-keys} +## JSONType(json\[, indices_or_keys\]...) {#jsontypejson-indices-or-keys} 返回JSON值的类型。 @@ -106,13 +106,13 @@ slug: /zh/sql-reference/functions/json-functions select JSONType('{"a": "hello", "b": [-100, 200.0, 300]}', 'a') = 'String' select JSONType('{"a": "hello", "b": [-100, 200.0, 300]}', 'b') = 'Array' -## JSONExtractUInt(json\[, indices_or_keys\]…) {#jsonextractuintjson-indices-or-keys} +## JSONExtractUInt(json\[, indices_or_keys\]...) {#jsonextractuintjson-indices-or-keys} -## JSONExtractInt(json\[, indices_or_keys\]…) {#jsonextractintjson-indices-or-keys} +## JSONExtractInt(json\[, indices_or_keys\]...) {#jsonextractintjson-indices-or-keys} -## JSONExtractFloat(json\[, indices_or_keys\]…) {#jsonextractfloatjson-indices-or-keys} +## JSONExtractFloat(json\[, indices_or_keys\]...) {#jsonextractfloatjson-indices-or-keys} -## JSONExtractBool(json\[, indices_or_keys\]…) {#jsonextractbooljson-indices-or-keys} +## JSONExtractBool(json\[, indices_or_keys\]...) {#jsonextractbooljson-indices-or-keys} 解析JSON并提取值。这些函数类似于`visitParam*`函数。 @@ -124,7 +124,7 @@ slug: /zh/sql-reference/functions/json-functions select JSONExtractFloat('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 2) = 200.0 select JSONExtractUInt('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', -1) = 300 -## JSONExtractString(json\[, indices_or_keys\]…) {#jsonextractstringjson-indices-or-keys} +## JSONExtractString(json\[, indices_or_keys\]...) {#jsonextractstringjson-indices-or-keys} 解析JSON并提取字符串。此函数类似于`visitParamExtractString`函数。 @@ -140,11 +140,11 @@ slug: /zh/sql-reference/functions/json-functions select JSONExtractString('{"abc":"\\u263"}', 'abc') = '' select JSONExtractString('{"abc":"hello}', 'abc') = '' -## JSONExtract(json\[, indices_or_keys…\], Return_type) {#jsonextractjson-indices-or-keys-return-type} +## JSONExtract(json\[, indices_or_keys...\], Return_type) {#jsonextractjson-indices-or-keys-return-type} 解析JSON并提取给定ClickHouse数据类型的值。 -这是以前的`JSONExtract函数的变体。 这意味着`JSONExtract(…, ‘String’)`返回与`JSONExtractString()`返回完全相同。`JSONExtract(…, ‘Float64’)`返回于`JSONExtractFloat()\`返回完全相同。 +这是以前的`JSONExtract函数的变体。 这意味着`JSONExtract(..., ‘String’)`返回与`JSONExtractString()`返回完全相同。`JSONExtract(..., ‘Float64’)`返回于`JSONExtractFloat()\`返回完全相同。 示例: @@ -156,7 +156,7 @@ slug: /zh/sql-reference/functions/json-functions SELECT JSONExtract('{"day": "Thursday"}', 'day', 'Enum8(\'Sunday\' = 0, \'Monday\' = 1, \'Tuesday\' = 2, \'Wednesday\' = 3, \'Thursday\' = 4, \'Friday\' = 5, \'Saturday\' = 6)') = 'Thursday' SELECT JSONExtract('{"day": 5}', 'day', 'Enum8(\'Sunday\' = 0, \'Monday\' = 1, \'Tuesday\' = 2, \'Wednesday\' = 3, \'Thursday\' = 4, \'Friday\' = 5, \'Saturday\' = 6)') = 'Friday' -## JSONExtractKeysAndValues(json\[, indices_or_keys…\], Value_type) {#jsonextractkeysandvaluesjson-indices-or-keys-value-type} +## JSONExtractKeysAndValues(json\[, indices_or_keys...\], Value_type) {#jsonextractkeysandvaluesjson-indices-or-keys-value-type} 从JSON中解析键值对,其中值是给定的ClickHouse数据类型。 @@ -164,7 +164,7 @@ slug: /zh/sql-reference/functions/json-functions SELECT JSONExtractKeysAndValues('{"x": {"a": 5, "b": 7, "c": 11}}', 'x', 'Int8') = [('a',5),('b',7),('c',11)]; -## JSONExtractRaw(json\[, indices_or_keys\]…) {#jsonextractrawjson-indices-or-keys} +## JSONExtractRaw(json\[, indices_or_keys\]...) {#jsonextractrawjson-indices-or-keys} 返回JSON的部分。 diff --git a/docs/zh/sql-reference/functions/other-functions.md b/docs/zh/sql-reference/functions/other-functions.md index 2eeaad63694..9c28ff867c5 100644 --- a/docs/zh/sql-reference/functions/other-functions.md +++ b/docs/zh/sql-reference/functions/other-functions.md @@ -90,7 +90,7 @@ SELECT 'some-file-name' AS a, basename(a) 将一个常量列变为一个非常量列。 在ClickHouse中,非常量列和常量列在内存中的表示方式不同。尽管函数对于常量列和非常量总是返回相同的结果,但它们的工作方式可能完全不同(执行不同的代码)。此函数用于调试这种行为。 -## ignore(…) {#ignore} +## ignore(...) {#ignore} 接受任何参数,包括`NULL`。始终返回0。 但是,函数的参数总是被计算的。该函数可以用于基准测试。 diff --git a/docs/zh/sql-reference/functions/string-functions.md b/docs/zh/sql-reference/functions/string-functions.md index d1914839d7c..c28735c7dc7 100644 --- a/docs/zh/sql-reference/functions/string-functions.md +++ b/docs/zh/sql-reference/functions/string-functions.md @@ -95,7 +95,7 @@ SELECT toValidUTF8('\x61\xF0\x80\x80\x80b') 以Unicode字符为单位反转UTF-8编码的字符串。如果字符串不是UTF-8编码,则可能获取到一个非预期的结果(不会抛出异常)。 -## format(pattern, s0, s1, …) {#formatpattern-s0-s1} +## format(pattern, s0, s1, ...) {#formatpattern-s0-s1} 使用常量字符串`pattern`格式化其他参数。`pattern`字符串中包含由大括号`{}`包围的«替换字段»。 未被包含在大括号中的任何内容都被视为文本内容,它将原样保留在返回值中。 如果你需要在文本内容中包含一个大括号字符,它可以通过加倍来转义:`{{ '{{' }}`和`{{ '{{' }} '}}' }}`。 字段名称可以是数字(从零开始)或空(然后将它们视为连续数字) @@ -113,11 +113,11 @@ SELECT format('{} {}', 'Hello', 'World') └───────────────────────────────────┘ ``` -## concat(s1, s2, …) {#concat-s1-s2} +## concat(s1, s2, ...) {#concat-s1-s2} 将参数中的多个字符串拼接,不带分隔符。 -## concatAssumeInjective(s1, s2, …) {#concatassumeinjectives1-s2} +## concatAssumeInjective(s1, s2, ...) {#concatassumeinjectives1-s2} 与[concat](#concat-s1-s2)相同,区别在于,你需要保证concat(s1, s2, s3) -\> s4是单射的,它将用于GROUP BY的优化。 diff --git a/docs/zh/sql-reference/functions/string-search-functions.md b/docs/zh/sql-reference/functions/string-search-functions.md index 972fd84e2a1..8ada76eeeda 100644 --- a/docs/zh/sql-reference/functions/string-search-functions.md +++ b/docs/zh/sql-reference/functions/string-search-functions.md @@ -204,7 +204,7 @@ SELECT multiSearchAllPositions('Hello, World!', ['hello', '!', 'world']); **语法** ```sql -multiSearchFirstPosition(haystack, [needle1, needle2, …, needleN]) +multiSearchFirstPosition(haystack, [needle1, needle2, ..., needleN]) ``` ## multiSearchFirstIndex @@ -216,7 +216,7 @@ multiSearchFirstPosition(haystack, [needle1, needle2, …, needleN]) **语法** ```sql -multiSearchFirstIndex(haystack, \[needle1, needle2, …, needlen\]) +multiSearchFirstIndex(haystack, \[needle1, needle2, ..., needlen\]) ``` ## multiSearchAny {#multisearchany} @@ -229,7 +229,7 @@ multiSearchFirstIndex(haystack, \[needle1, needle2, …, n **语法** ```sql -multiSearchAny(haystack, [needle1, needle2, …, needleN]) +multiSearchAny(haystack, [needle1, needle2, ..., needleN]) ``` ## match {#match} @@ -273,7 +273,7 @@ Hyperscan 通常容易受到正则表达式拒绝服务 (ReDoS) 攻击。有关 **语法** ```sql -multiMatchAny(haystack, \[pattern1, pattern2, …, patternn\]) +multiMatchAny(haystack, \[pattern1, pattern2, ..., patternn\]) ``` ## multiMatchAnyIndex @@ -283,7 +283,7 @@ multiMatchAny(haystack, \[pattern1, pattern2, …, pattern **语法** ```sql -multiMatchAnyIndex(haystack, \[pattern1, pattern2, …, patternn\]) +multiMatchAnyIndex(haystack, \[pattern1, pattern2, ..., patternn\]) ``` ## multiMatchAllIndices @@ -293,7 +293,7 @@ multiMatchAnyIndex(haystack, \[pattern1, pattern2, …, pa **语法** ```sql -multiMatchAllIndices(haystack, \[pattern1, pattern2, …, patternn\]) +multiMatchAllIndices(haystack, \[pattern1, pattern2, ..., patternn\]) ``` ## multiFuzzyMatchAny @@ -307,7 +307,7 @@ multiMatchAllIndices(haystack, \[pattern1, pattern2, …, **语法** ```sql -multiFuzzyMatchAny(haystack, distance, \[pattern1, pattern2, …, patternn\]) +multiFuzzyMatchAny(haystack, distance, \[pattern1, pattern2, ..., patternn\]) ``` ## multiFuzzyMatchAnyIndex @@ -317,7 +317,7 @@ multiFuzzyMatchAny(haystack, distance, \[pattern1, pattern21, pattern2, …, patternn\]) +multiFuzzyMatchAnyIndex(haystack, distance, \[pattern1, pattern2, ..., patternn\]) ``` ## multiFuzzyMatchAllIndices @@ -327,7 +327,7 @@ multiFuzzyMatchAnyIndex(haystack, distance, \[pattern1, pattern2 **语法** ```sql -multiFuzzyMatchAllIndices(haystack, distance, \[pattern1, pattern2, …, patternn\]) +multiFuzzyMatchAllIndices(haystack, distance, \[pattern1, pattern2, ..., patternn\]) ``` ## extract diff --git a/docs/zh/sql-reference/functions/url-functions.md b/docs/zh/sql-reference/functions/url-functions.md index 44880b6ca1a..e7a0354c0bf 100644 --- a/docs/zh/sql-reference/functions/url-functions.md +++ b/docs/zh/sql-reference/functions/url-functions.md @@ -11,7 +11,7 @@ slug: /zh/sql-reference/functions/url-functions ### 协议 {#protocol} -返回URL的协议。例如: http、ftp、mailto、magnet… +返回URL的协议。例如: http、ftp、mailto、magnet... ### 域 {#domain} diff --git a/docs/zh/sql-reference/statements/alter/delete.md b/docs/zh/sql-reference/statements/alter/delete.md index 5eb77c35a93..f0b41c4e214 100644 --- a/docs/zh/sql-reference/statements/alter/delete.md +++ b/docs/zh/sql-reference/statements/alter/delete.md @@ -4,7 +4,7 @@ sidebar_position: 39 sidebar_label: DELETE --- -# ALTER TABLE … DELETE 语句 {#alter-mutations} +# ALTER TABLE ... DELETE 语句 {#alter-mutations} ``` sql ALTER TABLE [db.]table [ON CLUSTER cluster] DELETE WHERE filter_expr diff --git a/docs/zh/sql-reference/statements/alter/index.md b/docs/zh/sql-reference/statements/alter/index.md index e173837a16c..2286dcccd13 100644 --- a/docs/zh/sql-reference/statements/alter/index.md +++ b/docs/zh/sql-reference/statements/alter/index.md @@ -38,7 +38,7 @@ sidebar_label: ALTER ## Mutations 突变 {#mutations} -用来操作表数据的ALTER查询是通过一种叫做“突变”的机制来实现的,最明显的是[ALTER TABLE … DELETE](../../../sql-reference/statements/alter/delete.md)和[ALTER TABLE … UPDATE](../../../sql-reference/statements/alter/update.md)。它们是异步的后台进程,类似于[MergeTree](../../../engines/table-engines/mergetree-family/index.md)表的合并,产生新的“突变”版本的部件。 +用来操作表数据的ALTER查询是通过一种叫做“突变”的机制来实现的,最明显的是[ALTER TABLE ... DELETE](../../../sql-reference/statements/alter/delete.md)和[ALTER TABLE ... UPDATE](../../../sql-reference/statements/alter/update.md)。它们是异步的后台进程,类似于[MergeTree](../../../engines/table-engines/mergetree-family/index.md)表的合并,产生新的“突变”版本的部件。 diff --git a/docs/zh/sql-reference/statements/alter/update.md b/docs/zh/sql-reference/statements/alter/update.md index 97b2b43d889..7cf37401dc5 100644 --- a/docs/zh/sql-reference/statements/alter/update.md +++ b/docs/zh/sql-reference/statements/alter/update.md @@ -4,7 +4,7 @@ sidebar_position: 40 sidebar_label: UPDATE --- -# ALTER TABLE … UPDATE 语句 {#alter-table-update-statements} +# ALTER TABLE ... UPDATE 语句 {#alter-table-update-statements} ``` sql ALTER TABLE [db.]table UPDATE column1 = expr1 [, ...] WHERE filter_expr diff --git a/docs/zh/sql-reference/statements/alter/view.md b/docs/zh/sql-reference/statements/alter/view.md index 34a612803c1..a19d918612a 100644 --- a/docs/zh/sql-reference/statements/alter/view.md +++ b/docs/zh/sql-reference/statements/alter/view.md @@ -4,9 +4,9 @@ sidebar_position: 50 sidebar_label: VIEW --- -# ALTER TABLE … MODIFY QUERY 语句 {#alter-modify-query} +# ALTER TABLE ... MODIFY QUERY 语句 {#alter-modify-query} -当使用`ALTER TABLE … MODIFY QUERY`语句创建一个[物化视图](../create/view.md#materialized)时,可以修改`SELECT`查询。当物化视图在没有 `TO [db.]name` 的情况下创建时使用它。必须启用 `allow_experimental_alter_materialized_view_structure`设置。 +当使用`ALTER TABLE ... MODIFY QUERY`语句创建一个[物化视图](../create/view.md#materialized)时,可以修改`SELECT`查询。当物化视图在没有 `TO [db.]name` 的情况下创建时使用它。必须启用 `allow_experimental_alter_materialized_view_structure`设置。 如果一个物化视图使用`TO [db.]name`,你必须先 [DETACH](../detach.mdx) 视图。用[ALTER TABLE](index.md)修改目标表,然后 [ATTACH](../attach.mdx)之前分离的(`DETACH`)视图。 diff --git a/docs/zh/sql-reference/statements/create/view.md b/docs/zh/sql-reference/statements/create/view.md index bce0994ecd2..49a1d66bdf1 100644 --- a/docs/zh/sql-reference/statements/create/view.md +++ b/docs/zh/sql-reference/statements/create/view.md @@ -55,7 +55,7 @@ ClickHouse 中的物化视图更像是插入触发器。 如果视图查询中 如果指定`POPULATE`,则在创建视图时将现有表数据插入到视图中,就像创建一个`CREATE TABLE ... AS SELECT ...`一样。 否则,查询仅包含创建视图后插入表中的数据。 我们**不建议**使用POPULATE,因为在创建视图期间插入表中的数据不会插入其中。 -`SELECT` 查询可以包含`DISTINCT`、`GROUP BY`、`ORDER BY`、`LIMIT`……请注意,相应的转换是在每个插入数据块上独立执行的。 例如,如果设置了`GROUP BY`,则在插入期间聚合数据,但仅在插入数据的单个数据包内。 数据不会被进一步聚合。 例外情况是使用独立执行数据聚合的`ENGINE`,例如`SummingMergeTree`。 +`SELECT` 查询可以包含`DISTINCT`、`GROUP BY`、`ORDER BY`、`LIMIT`...请注意,相应的转换是在每个插入数据块上独立执行的。 例如,如果设置了`GROUP BY`,则在插入期间聚合数据,但仅在插入数据的单个数据包内。 数据不会被进一步聚合。 例外情况是使用独立执行数据聚合的`ENGINE`,例如`SummingMergeTree`。 在物化视图上执行[ALTER](../../../sql-reference/statements/alter/index.md)查询有局限性,因此可能不方便。 如果物化视图使用构造`TO [db.]name`,你可以`DETACH`视图,为目标表运行`ALTER`,然后`ATTACH`先前分离的(`DETACH`)视图。 diff --git a/docs/zh/sql-reference/statements/insert-into.md b/docs/zh/sql-reference/statements/insert-into.md index f80c0a8a8ea..a08a78b6f1d 100644 --- a/docs/zh/sql-reference/statements/insert-into.md +++ b/docs/zh/sql-reference/statements/insert-into.md @@ -68,7 +68,7 @@ SELECT * FROM insert_select_testtable; INSERT INTO [db.]table [(c1, c2, c3)] FORMAT format_name data_set ``` -例如,下面的查询所使用的输入格式就与上面INSERT … VALUES的中使用的输入格式相同: +例如,下面的查询所使用的输入格式就与上面INSERT ... VALUES的中使用的输入格式相同: ``` sql INSERT INTO [TABLE] [db.]table [(c1, c2, c3)] FORMAT Values (v11, v12, v13), (v21, v22, v23), ... diff --git a/docs/zh/sql-reference/statements/select/limit.md b/docs/zh/sql-reference/statements/select/limit.md index 2bbf2949707..795f3f4ecd1 100644 --- a/docs/zh/sql-reference/statements/select/limit.md +++ b/docs/zh/sql-reference/statements/select/limit.md @@ -13,11 +13,11 @@ sidebar_label: LIMIT 如果没有 [ORDER BY](../../../sql-reference/statements/select/order-by.md) 子句显式排序结果,结果的行选择可能是任意的和非确定性的。 -## LIMIT … WITH TIES 修饰符 {#limit-with-ties} +## LIMIT ... WITH TIES 修饰符 {#limit-with-ties} 如果为 `LIMIT n[,m]` 设置了 `WITH TIES` ,并且声明了 `ORDER BY expr_list`, 除了得到无修饰符的结果(正常情况下的 `limit n`, 前n行数据), 还会返回与第`n`行具有相同排序字段的行(即如果第n+1行的字段与第n行 拥有相同的排序字段,同样返回该结果. -此修饰符可以与: [ORDER BY … WITH FILL modifier](../../../sql-reference/statements/select/order-by.md#orderby-with-fill) 组合使用. +此修饰符可以与: [ORDER BY ... WITH FILL modifier](../../../sql-reference/statements/select/order-by.md#orderby-with-fill) 组合使用. 例如以下查询: diff --git a/docs/zh/sql-reference/statements/select/order-by.md b/docs/zh/sql-reference/statements/select/order-by.md index 3286fc9f9e7..2f2d9a4959c 100644 --- a/docs/zh/sql-reference/statements/select/order-by.md +++ b/docs/zh/sql-reference/statements/select/order-by.md @@ -89,7 +89,7 @@ SELECT a, b, c FROM t ORDER BY a, b, c ## ORDER BY Expr WITH FILL Modifier {#orderby-with-fill} -此修饰符可以与 [LIMIT … WITH TIES modifier](../../../sql-reference/statements/select/limit.md#limit-with-ties) 进行组合使用. +此修饰符可以与 [LIMIT ... WITH TIES modifier](../../../sql-reference/statements/select/limit.md#limit-with-ties) 进行组合使用. 可以在`ORDER BY expr`之后用可选的`FROM expr`,`TO expr`和`STEP expr`参数来设置`WITH FILL`修饰符。 所有`expr`列的缺失值将被顺序填充,而其他列将被填充为默认值。 diff --git a/docs/zh/sql-reference/table-functions/file.md b/docs/zh/sql-reference/table-functions/file.md index 28682255738..fa1ec12f7df 100644 --- a/docs/zh/sql-reference/table-functions/file.md +++ b/docs/zh/sql-reference/table-functions/file.md @@ -114,7 +114,7 @@ FROM file('{some,another}_dir/*', 'TSV', 'name String, value UInt32') **示例** -从名为 `file000`, `file001`, … , `file999`的文件中查询数据: +从名为 `file000`, `file001`, ... , `file999`的文件中查询数据: ``` sql SELECT count(*) diff --git a/docs/zh/sql-reference/table-functions/hdfs.md b/docs/zh/sql-reference/table-functions/hdfs.md index b10b10ae2d2..f8320d8d0bb 100644 --- a/docs/zh/sql-reference/table-functions/hdfs.md +++ b/docs/zh/sql-reference/table-functions/hdfs.md @@ -84,7 +84,7 @@ FROM hdfs('hdfs://hdfs1:9000/{some,another}_dir/*', 'TSV', 'name String, value U **示例** -从名为 `file000`, `file001`, … , `file999`的文件中查询数据: +从名为 `file000`, `file001`, ... , `file999`的文件中查询数据: ``` sql SELECT count(*) diff --git a/docs/zh/sql-reference/table-functions/s3.md b/docs/zh/sql-reference/table-functions/s3.md index f7384a7526e..4f2c7299d95 100644 --- a/docs/zh/sql-reference/table-functions/s3.md +++ b/docs/zh/sql-reference/table-functions/s3.md @@ -99,7 +99,7 @@ FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefi !!! warning "Warning" 如果文件列表中包含有从零开头的数字范围,请对每个数字分别使用带括号的结构,或者使用`?`。 -计算名为 `file-000.csv`, `file-001.csv`, … , `file-999.csv` 文件的总行数: +计算名为 `file-000.csv`, `file-001.csv`, ... , `file-999.csv` 文件的总行数: ``` sql SELECT count(*) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 01ed7d70b38..efe23d57478 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -1178,7 +1178,7 @@ void Client::processConfig() pager = config().getString("pager", ""); - setDefaultFormatsFromConfiguration(); + setDefaultFormatsAndCompressionFromConfiguration(); global_context->setClientName(std::string(DEFAULT_CLIENT_NAME)); global_context->setQueryKindInitial(); diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index 267b725b02b..dba5c2b7d2a 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -182,6 +182,11 @@ std::string Keeper::getDefaultConfigFileName() const return "keeper_config.xml"; } +bool Keeper::allowTextLog() const +{ + return false; +} + void Keeper::handleCustomArguments(const std::string & arg, [[maybe_unused]] const std::string & value) // NOLINT { if (arg == "force-recovery") diff --git a/programs/keeper/Keeper.h b/programs/keeper/Keeper.h index f889ffa595b..c449c40b610 100644 --- a/programs/keeper/Keeper.h +++ b/programs/keeper/Keeper.h @@ -65,6 +65,8 @@ protected: std::string getDefaultConfigFileName() const override; + bool allowTextLog() const override; + private: Poco::Net::SocketAddress socketBindListen(Poco::Net::ServerSocket & socket, const std::string & host, UInt16 port, [[maybe_unused]] bool secure = false) const; diff --git a/programs/library-bridge/LibraryBridgeHandlers.h b/programs/library-bridge/LibraryBridgeHandlers.h index 1db71eb24cb..62fbf2caede 100644 --- a/programs/library-bridge/LibraryBridgeHandlers.h +++ b/programs/library-bridge/LibraryBridgeHandlers.h @@ -23,7 +23,7 @@ public: void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override; private: - static constexpr inline auto FORMAT = "RowBinary"; + static constexpr auto FORMAT = "RowBinary"; const size_t keep_alive_timeout; LoggerPtr log; diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 6d1ebf8d30c..4d5cfb09e6a 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -607,7 +607,7 @@ void LocalServer::processConfig() if (config().has("macros")) global_context->setMacros(std::make_unique(config(), "macros", log)); - setDefaultFormatsFromConfiguration(); + setDefaultFormatsAndCompressionFromConfiguration(); /// Sets external authenticators config (LDAP, Kerberos). global_context->setExternalAuthenticatorsConfig(config()); diff --git a/programs/server/MetricsTransmitter.h b/programs/server/MetricsTransmitter.h index 23420117b56..24069a60071 100644 --- a/programs/server/MetricsTransmitter.h +++ b/programs/server/MetricsTransmitter.h @@ -56,10 +56,10 @@ private: std::condition_variable cond; std::optional thread; - static inline constexpr auto profile_events_path_prefix = "ClickHouse.ProfileEvents."; - static inline constexpr auto profile_events_cumulative_path_prefix = "ClickHouse.ProfileEventsCumulative."; - static inline constexpr auto current_metrics_path_prefix = "ClickHouse.Metrics."; - static inline constexpr auto asynchronous_metrics_path_prefix = "ClickHouse.AsynchronousMetrics."; + static constexpr auto profile_events_path_prefix = "ClickHouse.ProfileEvents."; + static constexpr auto profile_events_cumulative_path_prefix = "ClickHouse.ProfileEventsCumulative."; + static constexpr auto current_metrics_path_prefix = "ClickHouse.Metrics."; + static constexpr auto asynchronous_metrics_path_prefix = "ClickHouse.AsynchronousMetrics."; }; } diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 9c9476d1aa7..223bc1f77e7 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1476,6 +1476,8 @@ try global_context->setMaxTableSizeToDrop(new_server_settings.max_table_size_to_drop); global_context->setMaxPartitionSizeToDrop(new_server_settings.max_partition_size_to_drop); global_context->setMaxTableNumToWarn(new_server_settings.max_table_num_to_warn); + global_context->setMaxViewNumToWarn(new_server_settings.max_view_num_to_warn); + global_context->setMaxDictionaryNumToWarn(new_server_settings.max_dictionary_num_to_warn); global_context->setMaxDatabaseNumToWarn(new_server_settings.max_database_num_to_warn); global_context->setMaxPartNumToWarn(new_server_settings.max_part_num_to_warn); diff --git a/src/AggregateFunctions/AggregateFunctionGroupArray.cpp b/src/AggregateFunctions/AggregateFunctionGroupArray.cpp index d4fb7afcb78..c21b1d376d9 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupArray.cpp +++ b/src/AggregateFunctions/AggregateFunctionGroupArray.cpp @@ -753,10 +753,11 @@ size_t getMaxArraySize() return 0xFFFFFF; } -bool hasLimitArraySize() +bool discardOnLimitReached() { if (auto context = Context::getGlobalContextInstance()) - return context->getServerSettings().aggregate_function_group_array_has_limit_size; + return context->getServerSettings().aggregate_function_group_array_action_when_limit_is_reached + == GroupArrayActionWhenLimitReached::DISCARD; return false; } @@ -767,7 +768,7 @@ AggregateFunctionPtr createAggregateFunctionGroupArray( { assertUnary(name, argument_types); - bool limit_size = hasLimitArraySize(); + bool has_limit = discardOnLimitReached(); UInt64 max_elems = getMaxArraySize(); if (parameters.empty()) @@ -784,14 +785,14 @@ AggregateFunctionPtr createAggregateFunctionGroupArray( (type == Field::Types::UInt64 && parameters[0].get() == 0)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should be positive number", name); - limit_size = true; + has_limit = true; max_elems = parameters[0].get(); } else throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Incorrect number of parameters for aggregate function {}, should be 0 or 1", name); - if (!limit_size) + if (!has_limit) { if (Tlast) throw Exception(ErrorCodes::BAD_ARGUMENTS, "groupArrayLast make sense only with max_elems (groupArrayLast(max_elems)())"); diff --git a/src/AggregateFunctions/AggregateFunctionSequenceNextNode.cpp b/src/AggregateFunctions/AggregateFunctionSequenceNextNode.cpp index bed10333af0..b3824720b04 100644 --- a/src/AggregateFunctions/AggregateFunctionSequenceNextNode.cpp +++ b/src/AggregateFunctions/AggregateFunctionSequenceNextNode.cpp @@ -341,7 +341,7 @@ public: value[i] = Node::read(buf, arena); } - inline std::optional getBaseIndex(Data & data) const + std::optional getBaseIndex(Data & data) const { if (data.value.size() == 0) return {}; diff --git a/src/AggregateFunctions/Combinators/AggregateFunctionIf.cpp b/src/AggregateFunctions/Combinators/AggregateFunctionIf.cpp index 9b5ee79a533..3e21ffa3418 100644 --- a/src/AggregateFunctions/Combinators/AggregateFunctionIf.cpp +++ b/src/AggregateFunctions/Combinators/AggregateFunctionIf.cpp @@ -73,7 +73,7 @@ private: using Base = AggregateFunctionNullBase>; - inline bool singleFilter(const IColumn ** columns, size_t row_num) const + bool singleFilter(const IColumn ** columns, size_t row_num) const { const IColumn * filter_column = columns[num_arguments - 1]; @@ -261,7 +261,7 @@ public: filter_is_only_null = arguments.back()->onlyNull(); } - static inline bool singleFilter(const IColumn ** columns, size_t row_num, size_t num_arguments) + static bool singleFilter(const IColumn ** columns, size_t row_num, size_t num_arguments) { return assert_cast(*columns[num_arguments - 1]).getData()[row_num]; } diff --git a/src/AggregateFunctions/QuantileTDigest.h b/src/AggregateFunctions/QuantileTDigest.h index 9d84f079daa..d5a4f6b576a 100644 --- a/src/AggregateFunctions/QuantileTDigest.h +++ b/src/AggregateFunctions/QuantileTDigest.h @@ -138,7 +138,7 @@ class QuantileTDigest compress(); } - inline bool canBeMerged(const BetterFloat & l_mean, const Value & r_mean) + bool canBeMerged(const BetterFloat & l_mean, const Value & r_mean) { return l_mean == r_mean || (!std::isinf(l_mean) && !std::isinf(r_mean)); } diff --git a/src/AggregateFunctions/QuantileTiming.h b/src/AggregateFunctions/QuantileTiming.h index 45fbf38258f..eef15828fc0 100644 --- a/src/AggregateFunctions/QuantileTiming.h +++ b/src/AggregateFunctions/QuantileTiming.h @@ -262,7 +262,7 @@ namespace detail UInt64 count_big[BIG_SIZE]; /// Get value of quantile by index in array `count_big`. - static inline UInt16 indexInBigToValue(size_t i) + static UInt16 indexInBigToValue(size_t i) { return (i * BIG_PRECISION) + SMALL_THRESHOLD + (intHash32<0>(i) % BIG_PRECISION - (BIG_PRECISION / 2)); /// A small randomization so that it is not noticeable that all the values are even. diff --git a/src/AggregateFunctions/ThetaSketchData.h b/src/AggregateFunctions/ThetaSketchData.h index f32386d945b..99dca27673d 100644 --- a/src/AggregateFunctions/ThetaSketchData.h +++ b/src/AggregateFunctions/ThetaSketchData.h @@ -24,14 +24,14 @@ private: std::unique_ptr sk_update; std::unique_ptr sk_union; - inline datasketches::update_theta_sketch * getSkUpdate() + datasketches::update_theta_sketch * getSkUpdate() { if (!sk_update) sk_update = std::make_unique(datasketches::update_theta_sketch::builder().build()); return sk_update.get(); } - inline datasketches::theta_union * getSkUnion() + datasketches::theta_union * getSkUnion() { if (!sk_union) sk_union = std::make_unique(datasketches::theta_union::builder().build()); diff --git a/src/AggregateFunctions/UniqVariadicHash.h b/src/AggregateFunctions/UniqVariadicHash.h index 840380e7f0f..5bb245397d4 100644 --- a/src/AggregateFunctions/UniqVariadicHash.h +++ b/src/AggregateFunctions/UniqVariadicHash.h @@ -38,7 +38,7 @@ bool isAllArgumentsContiguousInMemory(const DataTypes & argument_types); template <> struct UniqVariadicHash { - static inline UInt64 apply(size_t num_args, const IColumn ** columns, size_t row_num) + static UInt64 apply(size_t num_args, const IColumn ** columns, size_t row_num) { UInt64 hash; @@ -65,7 +65,7 @@ struct UniqVariadicHash template <> struct UniqVariadicHash { - static inline UInt64 apply(size_t num_args, const IColumn ** columns, size_t row_num) + static UInt64 apply(size_t num_args, const IColumn ** columns, size_t row_num) { UInt64 hash; @@ -94,7 +94,7 @@ struct UniqVariadicHash template <> struct UniqVariadicHash { - static inline UInt128 apply(size_t num_args, const IColumn ** columns, size_t row_num) + static UInt128 apply(size_t num_args, const IColumn ** columns, size_t row_num) { const IColumn ** column = columns; const IColumn ** columns_end = column + num_args; @@ -114,7 +114,7 @@ struct UniqVariadicHash template <> struct UniqVariadicHash { - static inline UInt128 apply(size_t num_args, const IColumn ** columns, size_t row_num) + static UInt128 apply(size_t num_args, const IColumn ** columns, size_t row_num) { const auto & tuple_columns = assert_cast(columns[0])->getColumns(); diff --git a/src/AggregateFunctions/UniquesHashSet.h b/src/AggregateFunctions/UniquesHashSet.h index d6fc2bb6634..d5241547711 100644 --- a/src/AggregateFunctions/UniquesHashSet.h +++ b/src/AggregateFunctions/UniquesHashSet.h @@ -105,14 +105,14 @@ private: } } - inline size_t buf_size() const { return 1ULL << size_degree; } /// NOLINT - inline size_t max_fill() const { return 1ULL << (size_degree - 1); } /// NOLINT - inline size_t mask() const { return buf_size() - 1; } + size_t buf_size() const { return 1ULL << size_degree; } /// NOLINT + size_t max_fill() const { return 1ULL << (size_degree - 1); } /// NOLINT + size_t mask() const { return buf_size() - 1; } - inline size_t place(HashValue x) const { return (x >> UNIQUES_HASH_BITS_FOR_SKIP) & mask(); } + size_t place(HashValue x) const { return (x >> UNIQUES_HASH_BITS_FOR_SKIP) & mask(); } /// The value is divided by 2 ^ skip_degree - inline bool good(HashValue hash) const { return hash == ((hash >> skip_degree) << skip_degree); } + bool good(HashValue hash) const { return hash == ((hash >> skip_degree) << skip_degree); } HashValue hash(Value key) const { return static_cast(Hash()(key)); } diff --git a/src/Analyzer/ArrayJoinNode.cpp b/src/Analyzer/ArrayJoinNode.cpp index 59389d4f2a8..27d7229d46a 100644 --- a/src/Analyzer/ArrayJoinNode.cpp +++ b/src/Analyzer/ArrayJoinNode.cpp @@ -24,6 +24,9 @@ void ArrayJoinNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_stat buffer << std::string(indent, ' ') << "ARRAY_JOIN id: " << format_state.getNodeId(this); buffer << ", is_left: " << is_left; + if (hasAlias()) + buffer << ", alias: " << getAlias(); + buffer << '\n' << std::string(indent + 2, ' ') << "TABLE EXPRESSION\n"; getTableExpression()->dumpTreeImpl(buffer, format_state, indent + 4); diff --git a/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp b/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp index f96ba22eb7a..9153bc4eca2 100644 --- a/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp +++ b/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp @@ -173,13 +173,13 @@ private: return arithmetic_function_clone; } - inline void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const + void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const { auto function = FunctionFactory::instance().get(function_name, getContext()); function_node.resolveAsFunction(function->build(function_node.getArgumentColumns())); } - static inline void resolveAggregateFunctionNode(FunctionNode & function_node, const QueryTreeNodePtr & argument, const String & aggregate_function_name) + static void resolveAggregateFunctionNode(FunctionNode & function_node, const QueryTreeNodePtr & argument, const String & aggregate_function_name) { auto function_aggregate_function = function_node.getAggregateFunction(); diff --git a/src/Analyzer/Passes/ComparisonTupleEliminationPass.cpp b/src/Analyzer/Passes/ComparisonTupleEliminationPass.cpp index f8233f473f8..ebefc12ae53 100644 --- a/src/Analyzer/Passes/ComparisonTupleEliminationPass.cpp +++ b/src/Analyzer/Passes/ComparisonTupleEliminationPass.cpp @@ -184,7 +184,7 @@ private: return result_function; } - inline QueryTreeNodePtr makeEqualsFunction(QueryTreeNodePtr lhs_argument, QueryTreeNodePtr rhs_argument) const + QueryTreeNodePtr makeEqualsFunction(QueryTreeNodePtr lhs_argument, QueryTreeNodePtr rhs_argument) const { return makeComparisonFunction(std::move(lhs_argument), std::move(rhs_argument), "equals"); } diff --git a/src/Analyzer/Passes/ConvertQueryToCNFPass.cpp b/src/Analyzer/Passes/ConvertQueryToCNFPass.cpp index 96bc62212fd..5951e8fc5ea 100644 --- a/src/Analyzer/Passes/ConvertQueryToCNFPass.cpp +++ b/src/Analyzer/Passes/ConvertQueryToCNFPass.cpp @@ -99,6 +99,23 @@ bool checkIfGroupAlwaysTrueGraph(const Analyzer::CNF::OrGroup & group, const Com return false; } +bool checkIfGroupAlwaysTrueAtoms(const Analyzer::CNF::OrGroup & group) +{ + /// Filters out groups containing mutually exclusive atoms, + /// since these groups are always True + + for (const auto & atom : group) + { + auto negated(atom); + negated.negative = !atom.negative; + if (group.contains(negated)) + { + return true; + } + } + return false; +} + bool checkIfAtomAlwaysFalseFullMatch(const Analyzer::CNF::AtomicFormula & atom, const ConstraintsDescription::QueryTreeData & query_tree_constraints) { const auto constraint_atom_ids = query_tree_constraints.getAtomIds(atom.node_with_hash); @@ -644,7 +661,8 @@ void optimizeWithConstraints(Analyzer::CNF & cnf, const QueryTreeNodes & table_e cnf.filterAlwaysTrueGroups([&](const auto & group) { /// remove always true groups from CNF - return !checkIfGroupAlwaysTrueFullMatch(group, query_tree_constraints) && !checkIfGroupAlwaysTrueGraph(group, compare_graph); + return !checkIfGroupAlwaysTrueFullMatch(group, query_tree_constraints) + && !checkIfGroupAlwaysTrueGraph(group, compare_graph) && !checkIfGroupAlwaysTrueAtoms(group); }) .filterAlwaysFalseAtoms([&](const Analyzer::CNF::AtomicFormula & atom) { diff --git a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp index 6248f462979..15ac8d642a4 100644 --- a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp +++ b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp @@ -215,7 +215,7 @@ public: } private: - inline void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const + void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const { auto function = FunctionFactory::instance().get(function_name, getContext()); function_node.resolveAsFunction(function->build(function_node.getArgumentColumns())); diff --git a/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp b/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp index 0d6f3fc2d87..e70e08e65f4 100644 --- a/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp +++ b/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp @@ -59,7 +59,7 @@ public: } } private: - static inline void resolveAsCountAggregateFunction(FunctionNode & function_node) + static void resolveAsCountAggregateFunction(FunctionNode & function_node) { AggregateFunctionProperties properties; auto aggregate_function = AggregateFunctionFactory::instance().get("count", NullsAction::EMPTY, {}, {}, properties); diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index dae17e18b85..43edaaa53fd 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -586,6 +586,89 @@ private: std::unordered_map alias_name_to_expressions; }; +struct ScopeAliases +{ + /// Alias name to query expression node + std::unordered_map alias_name_to_expression_node_before_group_by; + std::unordered_map alias_name_to_expression_node_after_group_by; + + std::unordered_map * alias_name_to_expression_node = nullptr; + + /// Alias name to lambda node + std::unordered_map alias_name_to_lambda_node; + + /// Alias name to table expression node + std::unordered_map alias_name_to_table_expression_node; + + /// Expressions like `x as y` where we can't say whether it's a function, expression or table. + std::unordered_map transitive_aliases; + + /// Nodes with duplicated aliases + std::unordered_set nodes_with_duplicated_aliases; + std::vector cloned_nodes_with_duplicated_aliases; + + /// Names which are aliases from ARRAY JOIN. + /// This is needed to properly qualify columns from matchers and avoid name collision. + std::unordered_set array_join_aliases; + + std::unordered_map & getAliasMap(IdentifierLookupContext lookup_context) + { + switch (lookup_context) + { + case IdentifierLookupContext::EXPRESSION: return *alias_name_to_expression_node; + case IdentifierLookupContext::FUNCTION: return alias_name_to_lambda_node; + case IdentifierLookupContext::TABLE_EXPRESSION: return alias_name_to_table_expression_node; + } + } + + enum class FindOption + { + FIRST_NAME, + FULL_NAME, + }; + + const std::string & getKey(const Identifier & identifier, FindOption find_option) + { + switch (find_option) + { + case FindOption::FIRST_NAME: return identifier.front(); + case FindOption::FULL_NAME: return identifier.getFullName(); + } + } + + QueryTreeNodePtr * find(IdentifierLookup lookup, FindOption find_option) + { + auto & alias_map = getAliasMap(lookup.lookup_context); + const std::string * key = &getKey(lookup.identifier, find_option); + + auto it = alias_map.find(*key); + + if (it != alias_map.end()) + return &it->second; + + if (lookup.lookup_context == IdentifierLookupContext::TABLE_EXPRESSION) + return {}; + + while (it == alias_map.end()) + { + auto jt = transitive_aliases.find(*key); + if (jt == transitive_aliases.end()) + return {}; + + key = &(getKey(jt->second, find_option)); + it = alias_map.find(*key); + } + + return &it->second; + } + + const QueryTreeNodePtr * find(IdentifierLookup lookup, FindOption find_option) const + { + return const_cast(this)->find(lookup, find_option); + } +}; + + /** Projection names is name of query tree node that is used in projection part of query node. * Example: SELECT id FROM test_table; * `id` is projection name of column node @@ -731,7 +814,7 @@ struct IdentifierResolveScope else if (parent_scope) join_use_nulls = parent_scope->join_use_nulls; - alias_name_to_expression_node = &alias_name_to_expression_node_before_group_by; + aliases.alias_name_to_expression_node = &aliases.alias_name_to_expression_node_before_group_by; } QueryTreeNodePtr scope_node; @@ -746,17 +829,7 @@ struct IdentifierResolveScope /// Argument can be expression like constant, column, function or table expression std::unordered_map expression_argument_name_to_node; - /// Alias name to query expression node - std::unordered_map alias_name_to_expression_node_before_group_by; - std::unordered_map alias_name_to_expression_node_after_group_by; - - std::unordered_map * alias_name_to_expression_node = nullptr; - - /// Alias name to lambda node - std::unordered_map alias_name_to_lambda_node; - - /// Alias name to table expression node - std::unordered_map alias_name_to_table_expression_node; + ScopeAliases aliases; /// Table column name to column node. Valid only during table ALIAS columns resolve. ColumnNameToColumnNodeMap column_name_to_column_node; @@ -767,10 +840,6 @@ struct IdentifierResolveScope /// Window name to window node std::unordered_map window_name_to_window_node; - /// Nodes with duplicated aliases - std::unordered_set nodes_with_duplicated_aliases; - std::vector cloned_nodes_with_duplicated_aliases; - /// Current scope expression in resolve process stack ExpressionsStack expressions_in_resolve_process_stack; @@ -889,7 +958,7 @@ struct IdentifierResolveScope bool had_aggregate_function = expressions_in_resolve_process_stack.hasAggregateFunction(); expressions_in_resolve_process_stack.push(node); if (group_by_use_nulls && had_aggregate_function != expressions_in_resolve_process_stack.hasAggregateFunction()) - alias_name_to_expression_node = &alias_name_to_expression_node_before_group_by; + aliases.alias_name_to_expression_node = &aliases.alias_name_to_expression_node_before_group_by; } void popExpressionNode() @@ -897,7 +966,7 @@ struct IdentifierResolveScope bool had_aggregate_function = expressions_in_resolve_process_stack.hasAggregateFunction(); expressions_in_resolve_process_stack.pop(); if (group_by_use_nulls && had_aggregate_function != expressions_in_resolve_process_stack.hasAggregateFunction()) - alias_name_to_expression_node = &alias_name_to_expression_node_after_group_by; + aliases.alias_name_to_expression_node = &aliases.alias_name_to_expression_node_after_group_by; } /// Dump identifier resolve scope @@ -916,16 +985,16 @@ struct IdentifierResolveScope for (const auto & [alias_name, node] : expression_argument_name_to_node) buffer << "Alias name " << alias_name << " node " << node->formatASTForErrorMessage() << '\n'; - buffer << "Alias name to expression node table size " << alias_name_to_expression_node->size() << '\n'; - for (const auto & [alias_name, node] : *alias_name_to_expression_node) + buffer << "Alias name to expression node table size " << aliases.alias_name_to_expression_node->size() << '\n'; + for (const auto & [alias_name, node] : *aliases.alias_name_to_expression_node) buffer << "Alias name " << alias_name << " expression node " << node->dumpTree() << '\n'; - buffer << "Alias name to function node table size " << alias_name_to_lambda_node.size() << '\n'; - for (const auto & [alias_name, node] : alias_name_to_lambda_node) + buffer << "Alias name to function node table size " << aliases.alias_name_to_lambda_node.size() << '\n'; + for (const auto & [alias_name, node] : aliases.alias_name_to_lambda_node) buffer << "Alias name " << alias_name << " lambda node " << node->formatASTForErrorMessage() << '\n'; - buffer << "Alias name to table expression node table size " << alias_name_to_table_expression_node.size() << '\n'; - for (const auto & [alias_name, node] : alias_name_to_table_expression_node) + buffer << "Alias name to table expression node table size " << aliases.alias_name_to_table_expression_node.size() << '\n'; + for (const auto & [alias_name, node] : aliases.alias_name_to_table_expression_node) buffer << "Alias name " << alias_name << " node " << node->formatASTForErrorMessage() << '\n'; buffer << "CTE name to query node table size " << cte_name_to_query_node.size() << '\n'; @@ -936,8 +1005,8 @@ struct IdentifierResolveScope for (const auto & [window_name, node] : window_name_to_window_node) buffer << "CTE name " << window_name << " node " << node->formatASTForErrorMessage() << '\n'; - buffer << "Nodes with duplicated aliases size " << nodes_with_duplicated_aliases.size() << '\n'; - for (const auto & node : nodes_with_duplicated_aliases) + buffer << "Nodes with duplicated aliases size " << aliases.nodes_with_duplicated_aliases.size() << '\n'; + for (const auto & node : aliases.nodes_with_duplicated_aliases) buffer << "Alias name " << node->getAlias() << " node " << node->formatASTForErrorMessage() << '\n'; buffer << "Expression resolve process stack " << '\n'; @@ -996,8 +1065,8 @@ struct IdentifierResolveScope class QueryExpressionsAliasVisitor : public InDepthQueryTreeVisitor { public: - explicit QueryExpressionsAliasVisitor(IdentifierResolveScope & scope_) - : scope(scope_) + explicit QueryExpressionsAliasVisitor(ScopeAliases & aliases_) + : aliases(aliases_) {} void visitImpl(QueryTreeNodePtr & node) @@ -1034,10 +1103,10 @@ public: private: void addDuplicatingAlias(const QueryTreeNodePtr & node) { - scope.nodes_with_duplicated_aliases.emplace(node); + aliases.nodes_with_duplicated_aliases.emplace(node); auto cloned_node = node->clone(); - scope.cloned_nodes_with_duplicated_aliases.emplace_back(cloned_node); - scope.nodes_with_duplicated_aliases.emplace(cloned_node); + aliases.cloned_nodes_with_duplicated_aliases.emplace_back(cloned_node); + aliases.nodes_with_duplicated_aliases.emplace(cloned_node); } void updateAliasesIfNeeded(const QueryTreeNodePtr & node, bool is_lambda_node) @@ -1053,29 +1122,29 @@ private: if (is_lambda_node) { - if (scope.alias_name_to_expression_node->contains(alias)) + if (aliases.alias_name_to_expression_node->contains(alias)) addDuplicatingAlias(node); - auto [_, inserted] = scope.alias_name_to_lambda_node.insert(std::make_pair(alias, node)); + auto [_, inserted] = aliases.alias_name_to_lambda_node.insert(std::make_pair(alias, node)); if (!inserted) addDuplicatingAlias(node); return; } - if (scope.alias_name_to_lambda_node.contains(alias)) - addDuplicatingAlias(node); + if (aliases.alias_name_to_lambda_node.contains(alias)) + addDuplicatingAlias(node); - auto [_, inserted] = scope.alias_name_to_expression_node->insert(std::make_pair(alias, node)); + auto [_, inserted] = aliases.alias_name_to_expression_node->insert(std::make_pair(alias, node)); if (!inserted) - addDuplicatingAlias(node); + addDuplicatingAlias(node); - /// If node is identifier put it also in scope alias name to lambda node map - if (node->getNodeType() == QueryTreeNodeType::IDENTIFIER) - scope.alias_name_to_lambda_node.insert(std::make_pair(alias, node)); + /// If node is identifier put it into transitive aliases map. + if (const auto * identifier = typeid_cast(node.get())) + aliases.transitive_aliases.insert(std::make_pair(alias, identifier->getIdentifier())); } - IdentifierResolveScope & scope; + ScopeAliases & aliases; }; class TableExpressionsAliasVisitor : public InDepthQueryTreeVisitor @@ -1122,7 +1191,7 @@ private: return; const auto & node_alias = node->getAlias(); - auto [_, inserted] = scope.alias_name_to_table_expression_node.emplace(node_alias, node); + auto [_, inserted] = scope.aliases.alias_name_to_table_expression_node.emplace(node_alias, node); if (!inserted) throw Exception(ErrorCodes::MULTIPLE_EXPRESSIONS_FOR_ALIAS, "Multiple table expressions with same alias {}. In scope {}", @@ -1193,7 +1262,7 @@ public: } case QueryTreeNodeType::TABLE_FUNCTION: { - QueryExpressionsAliasVisitor expressions_alias_visitor(scope); + QueryExpressionsAliasVisitor expressions_alias_visitor(scope.aliases); resolveTableFunction(node, scope, expressions_alias_visitor, false /*nested_table_function*/); break; } @@ -1461,7 +1530,7 @@ private: ProjectionNames resolveFunction(QueryTreeNodePtr & function_node, IdentifierResolveScope & scope); - ProjectionNames resolveExpressionNode(QueryTreeNodePtr & node, IdentifierResolveScope & scope, bool allow_lambda_expression, bool allow_table_expression); + ProjectionNames resolveExpressionNode(QueryTreeNodePtr & node, IdentifierResolveScope & scope, bool allow_lambda_expression, bool allow_table_expression, bool ignore_alias = false); ProjectionNames resolveExpressionNodeList(QueryTreeNodePtr & node_list, IdentifierResolveScope & scope, bool allow_lambda_expression, bool allow_table_expression); @@ -1868,7 +1937,7 @@ void QueryAnalyzer::collectScopeValidIdentifiersForTypoCorrection( if (allow_expression_identifiers) { - for (const auto & [name, expression] : *scope.alias_name_to_expression_node) + for (const auto & [name, expression] : *scope.aliases.alias_name_to_expression_node) { assert(expression); auto expression_identifier = Identifier(name); @@ -1898,13 +1967,13 @@ void QueryAnalyzer::collectScopeValidIdentifiersForTypoCorrection( { if (allow_function_identifiers) { - for (const auto & [name, _] : *scope.alias_name_to_expression_node) + for (const auto & [name, _] : *scope.aliases.alias_name_to_expression_node) valid_identifiers_result.insert(Identifier(name)); } if (allow_table_expression_identifiers) { - for (const auto & [name, _] : scope.alias_name_to_table_expression_node) + for (const auto & [name, _] : scope.aliases.alias_name_to_table_expression_node) valid_identifiers_result.insert(Identifier(name)); } } @@ -2793,21 +2862,7 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromExpressionArguments(cons bool QueryAnalyzer::tryBindIdentifierToAliases(const IdentifierLookup & identifier_lookup, const IdentifierResolveScope & scope) { - const auto & identifier_bind_part = identifier_lookup.identifier.front(); - - auto get_alias_name_to_node_map = [&]() -> const std::unordered_map & - { - if (identifier_lookup.isExpressionLookup()) - return *scope.alias_name_to_expression_node; - else if (identifier_lookup.isFunctionLookup()) - return scope.alias_name_to_lambda_node; - - return scope.alias_name_to_table_expression_node; - }; - - const auto & alias_name_to_node_map = get_alias_name_to_node_map(); - - return alias_name_to_node_map.contains(identifier_bind_part); + return scope.aliases.find(identifier_lookup, ScopeAliases::FindOption::FIRST_NAME) != nullptr || scope.aliases.array_join_aliases.contains(identifier_lookup.identifier.front()); } /** Resolve identifier from scope aliases. @@ -2857,23 +2912,13 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromAliases(const Identifier { const auto & identifier_bind_part = identifier_lookup.identifier.front(); - auto get_alias_name_to_node_map = [&]() -> std::unordered_map & - { - if (identifier_lookup.isExpressionLookup()) - return *scope.alias_name_to_expression_node; - else if (identifier_lookup.isFunctionLookup()) - return scope.alias_name_to_lambda_node; - - return scope.alias_name_to_table_expression_node; - }; - - auto & alias_name_to_node_map = get_alias_name_to_node_map(); - auto it = alias_name_to_node_map.find(identifier_bind_part); - - if (it == alias_name_to_node_map.end()) + auto * it = scope.aliases.find(identifier_lookup, ScopeAliases::FindOption::FIRST_NAME); + if (it == nullptr) return {}; - if (!it->second) + QueryTreeNodePtr & alias_node = *it; + + if (!alias_node) throw Exception(ErrorCodes::LOGICAL_ERROR, "Node with alias {} is not valid. In scope {}", identifier_bind_part, @@ -2893,14 +2938,14 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromAliases(const Identifier return {}; } - auto node_type = it->second->getNodeType(); + auto node_type = alias_node->getNodeType(); /// Resolve expression if necessary if (node_type == QueryTreeNodeType::IDENTIFIER) { - scope.pushExpressionNode(it->second); + scope.pushExpressionNode(alias_node); - auto & alias_identifier_node = it->second->as(); + auto & alias_identifier_node = alias_node->as(); auto identifier = alias_identifier_node.getIdentifier(); auto lookup_result = tryResolveIdentifier(IdentifierLookup{identifier, identifier_lookup.lookup_context}, scope, identifier_resolve_settings); if (!lookup_result.resolved_identifier) @@ -2916,43 +2961,27 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromAliases(const Identifier getHintsErrorMessageSuffix(hints)); } - it->second = lookup_result.resolved_identifier; - - /** During collection of aliases if node is identifier and has alias, we cannot say if it is - * column or function node. Check QueryExpressionsAliasVisitor documentation for clarification. - * - * If we resolved identifier node as expression, we must remove identifier node alias from - * function alias map. - * If we resolved identifier node as function, we must remove identifier node alias from - * expression alias map. - */ - if (identifier_lookup.isExpressionLookup()) - scope.alias_name_to_lambda_node.erase(identifier_bind_part); - else if (identifier_lookup.isFunctionLookup()) - scope.alias_name_to_expression_node->erase(identifier_bind_part); - + alias_node = lookup_result.resolved_identifier; scope.popExpressionNode(); } else if (node_type == QueryTreeNodeType::FUNCTION) { - resolveExpressionNode(it->second, scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/); + resolveExpressionNode(alias_node, scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/); } else if (node_type == QueryTreeNodeType::QUERY || node_type == QueryTreeNodeType::UNION) { if (identifier_resolve_settings.allow_to_resolve_subquery_during_identifier_resolution) - resolveExpressionNode(it->second, scope, false /*allow_lambda_expression*/, identifier_lookup.isTableExpressionLookup() /*allow_table_expression*/); + resolveExpressionNode(alias_node, scope, false /*allow_lambda_expression*/, identifier_lookup.isTableExpressionLookup() /*allow_table_expression*/); } - QueryTreeNodePtr result = it->second; - - if (identifier_lookup.identifier.isCompound() && result) + if (identifier_lookup.identifier.isCompound() && alias_node) { if (identifier_lookup.isExpressionLookup()) { return tryResolveIdentifierFromCompoundExpression( identifier_lookup.identifier, 1 /*identifier_bind_size*/, - it->second, + alias_node, {} /* compound_expression_source */, scope, identifier_resolve_settings.allow_to_check_join_tree /* can_be_not_found */); @@ -2967,7 +2996,7 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromAliases(const Identifier } } - return result; + return alias_node; } /** Resolve identifier from table columns. @@ -3864,12 +3893,40 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromArrayJoin(const Identifi { auto & array_join_column_expression_typed = array_join_column_expression->as(); - if (array_join_column_expression_typed.getAlias() == identifier_lookup.identifier.getFullName()) + IdentifierView identifier_view(identifier_lookup.identifier); + + if (identifier_view.isCompound() && from_array_join_node.hasAlias() && identifier_view.front() == from_array_join_node.getAlias()) + identifier_view.popFirst(); + + const auto & alias_or_name = array_join_column_expression_typed.hasAlias() + ? array_join_column_expression_typed.getAlias() + : array_join_column_expression_typed.getColumnName(); + + if (identifier_view.front() == alias_or_name) + identifier_view.popFirst(); + else if (identifier_view.getFullName() == alias_or_name) + identifier_view.popFirst(identifier_view.getPartsSize()); /// Clear + else + continue; + + if (identifier_view.empty()) { auto array_join_column = std::make_shared(array_join_column_expression_typed.getColumn(), array_join_column_expression_typed.getColumnSource()); return array_join_column; } + + /// Resolve subcolumns. Example : SELECT x.y.z FROM tab ARRAY JOIN arr AS x + auto compound_expr = tryResolveIdentifierFromCompoundExpression( + identifier_lookup.identifier, + identifier_lookup.identifier.getPartsSize() - identifier_view.getPartsSize() /*identifier_bind_size*/, + array_join_column_expression, + {} /* compound_expression_source */, + scope, + true /* can_be_not_found */); + + if (compound_expr) + return compound_expr; } if (!resolved_identifier) @@ -4128,10 +4185,11 @@ IdentifierResolveResult QueryAnalyzer::tryResolveIdentifier(const IdentifierLook * SELECT id FROM ( SELECT ... ) AS subquery ARRAY JOIN [0] AS id INNER JOIN second_table USING (id) * In the example, identifier `id` should be resolved into one from USING (id) column. */ - auto alias_it = scope.alias_name_to_expression_node->find(identifier_lookup.identifier.getFullName()); - if (alias_it != scope.alias_name_to_expression_node->end() && alias_it->second->getNodeType() == QueryTreeNodeType::COLUMN) + + auto * alias_it = scope.aliases.find(identifier_lookup, ScopeAliases::FindOption::FULL_NAME); + if (alias_it && (*alias_it)->getNodeType() == QueryTreeNodeType::COLUMN) { - const auto & column_node = alias_it->second->as(); + const auto & column_node = (*alias_it)->as(); if (column_node.getColumnSource()->getNodeType() == QueryTreeNodeType::ARRAY_JOIN) prefer_column_name_to_alias = true; } @@ -4617,6 +4675,36 @@ QueryAnalyzer::QueryTreeNodesWithNames QueryAnalyzer::resolveUnqualifiedMatcher( std::unordered_set table_expression_column_names_to_skip; + QueryTreeNodesWithNames result; + + if (matcher_node_typed.getMatcherType() == MatcherNodeType::COLUMNS_LIST) + { + auto identifiers = matcher_node_typed.getColumnsIdentifiers(); + result.reserve(identifiers.size()); + + for (const auto & identifier : identifiers) + { + auto resolve_result = tryResolveIdentifier(IdentifierLookup{identifier, IdentifierLookupContext::EXPRESSION}, scope); + if (!resolve_result.isResolved()) + throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER, + "Unknown identifier '{}' inside COLUMNS matcher. In scope {}", + identifier.getFullName(), scope.dump()); + + // TODO: Introduce IdentifierLookupContext::COLUMN and get rid of this check + auto * resolved_column = resolve_result.resolved_identifier->as(); + if (!resolved_column) + throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER, + "Identifier '{}' inside COLUMNS matcher must resolve into a column, but got {}. In scope {}", + identifier.getFullName(), + resolve_result.resolved_identifier->getNodeTypeName(), + scope.scope_node->formatASTForErrorMessage()); + result.emplace_back(resolve_result.resolved_identifier, resolved_column->getColumnName()); + } + return result; + } + + result.resize(matcher_node_typed.getColumnsIdentifiers().size()); + for (auto & table_expression : table_expressions_stack) { bool table_expression_in_resolve_process = nearest_query_scope->table_expressions_in_resolve_process.contains(table_expression.get()); @@ -4784,8 +4872,6 @@ QueryAnalyzer::QueryTreeNodesWithNames QueryAnalyzer::resolveUnqualifiedMatcher( table_expressions_column_nodes_with_names_stack.push_back(std::move(matched_column_nodes_with_names)); } - QueryTreeNodesWithNames result; - for (auto & table_expression_column_nodes_with_names : table_expressions_column_nodes_with_names_stack) { for (auto && table_expression_column_node_with_name : table_expression_column_nodes_with_names) @@ -5236,7 +5322,7 @@ ProjectionNames QueryAnalyzer::resolveLambda(const QueryTreeNodePtr & lambda_nod scope.scope_node->formatASTForErrorMessage()); /// Initialize aliases in lambda scope - QueryExpressionsAliasVisitor visitor(scope); + QueryExpressionsAliasVisitor visitor(scope.aliases); visitor.visit(lambda_to_resolve.getExpression()); /** Replace lambda arguments with new arguments. @@ -5256,8 +5342,8 @@ ProjectionNames QueryAnalyzer::resolveLambda(const QueryTreeNodePtr & lambda_nod const auto & lambda_argument_name = lambda_argument_identifier ? lambda_argument_identifier->getIdentifier().getFullName() : lambda_argument_column->getColumnName(); - bool has_expression_node = scope.alias_name_to_expression_node->contains(lambda_argument_name); - bool has_alias_node = scope.alias_name_to_lambda_node.contains(lambda_argument_name); + bool has_expression_node = scope.aliases.alias_name_to_expression_node->contains(lambda_argument_name); + bool has_alias_node = scope.aliases.alias_name_to_lambda_node.contains(lambda_argument_name); if (has_expression_node || has_alias_node) { @@ -5933,7 +6019,7 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi function_names = AggregateFunctionFactory::instance().getAllRegisteredNames(); possible_function_names.insert(possible_function_names.end(), function_names.begin(), function_names.end()); - for (auto & [name, lambda_node] : scope.alias_name_to_lambda_node) + for (auto & [name, lambda_node] : scope.aliases.alias_name_to_lambda_node) { if (lambda_node->getNodeType() == QueryTreeNodeType::LAMBDA) possible_function_names.push_back(name); @@ -6230,7 +6316,7 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi * * 4. If node has alias, update its value in scope alias map. Deregister alias from expression_aliases_in_resolve_process. */ -ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, IdentifierResolveScope & scope, bool allow_lambda_expression, bool allow_table_expression) +ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, IdentifierResolveScope & scope, bool allow_lambda_expression, bool allow_table_expression, bool ignore_alias) { checkStackSize(); @@ -6267,7 +6353,7 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id result_projection_names.push_back(node_alias); } - bool is_duplicated_alias = scope.nodes_with_duplicated_aliases.contains(node); + bool is_duplicated_alias = scope.aliases.nodes_with_duplicated_aliases.contains(node); if (is_duplicated_alias) scope.non_cached_identifier_lookups_during_expression_resolve.insert({Identifier{node_alias}, IdentifierLookupContext::EXPRESSION}); @@ -6280,7 +6366,7 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id * To support both (SELECT 1) AS expression in projection and (SELECT 1) as subquery in IN, do not use * alias table because in alias table subquery could be evaluated as scalar. */ - bool use_alias_table = true; + bool use_alias_table = !ignore_alias; if (is_duplicated_alias || (allow_table_expression && isSubqueryNodeType(node->getNodeType()))) use_alias_table = false; @@ -6291,14 +6377,14 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id * * To resolve b we need to resolve a. */ - auto it = scope.alias_name_to_expression_node->find(node_alias); - if (it != scope.alias_name_to_expression_node->end()) + auto it = scope.aliases.alias_name_to_expression_node->find(node_alias); + if (it != scope.aliases.alias_name_to_expression_node->end()) node = it->second; if (allow_lambda_expression) { - it = scope.alias_name_to_lambda_node.find(node_alias); - if (it != scope.alias_name_to_lambda_node.end()) + it = scope.aliases.alias_name_to_lambda_node.find(node_alias); + if (it != scope.aliases.alias_name_to_lambda_node.end()) node = it->second; } } @@ -6324,17 +6410,9 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id result_projection_names.push_back(projection_name_it->second); } - if (resolved_identifier_node && !node_alias.empty()) - scope.alias_name_to_lambda_node.erase(node_alias); - if (!resolved_identifier_node && allow_lambda_expression) - { resolved_identifier_node = tryResolveIdentifier({unresolved_identifier, IdentifierLookupContext::FUNCTION}, scope).resolved_identifier; - if (resolved_identifier_node && !node_alias.empty()) - scope.alias_name_to_expression_node->erase(node_alias); - } - if (!resolved_identifier_node && allow_table_expression) { resolved_identifier_node = tryResolveIdentifier({unresolved_identifier, IdentifierLookupContext::TABLE_EXPRESSION}, scope).resolved_identifier; @@ -6573,14 +6651,14 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id */ if (!node_alias.empty() && use_alias_table && !scope.group_by_use_nulls) { - auto it = scope.alias_name_to_expression_node->find(node_alias); - if (it != scope.alias_name_to_expression_node->end()) + auto it = scope.aliases.alias_name_to_expression_node->find(node_alias); + if (it != scope.aliases.alias_name_to_expression_node->end()) it->second = node; if (allow_lambda_expression) { - it = scope.alias_name_to_lambda_node.find(node_alias); - if (it != scope.alias_name_to_lambda_node.end()) + it = scope.aliases.alias_name_to_lambda_node.find(node_alias); + if (it != scope.aliases.alias_name_to_lambda_node.end()) it->second = node; } } @@ -6588,7 +6666,8 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id if (is_duplicated_alias) scope.non_cached_identifier_lookups_during_expression_resolve.erase({Identifier{node_alias}, IdentifierLookupContext::EXPRESSION}); - resolved_expressions.emplace(node, result_projection_names); + if (!ignore_alias) + resolved_expressions.emplace(node, result_projection_names); scope.popExpressionNode(); bool expression_was_root = scope.expressions_in_resolve_process_stack.empty(); @@ -6953,8 +7032,8 @@ void QueryAnalyzer::initializeQueryJoinTreeNode(QueryTreeNodePtr & join_tree_nod resolved_identifier = resolved_identifier->clone(); /// Update alias name to table expression map - auto table_expression_it = scope.alias_name_to_table_expression_node.find(from_table_identifier_alias); - if (table_expression_it != scope.alias_name_to_table_expression_node.end()) + auto table_expression_it = scope.aliases.alias_name_to_table_expression_node.find(from_table_identifier_alias); + if (table_expression_it != scope.aliases.alias_name_to_table_expression_node.end()) table_expression_it->second = resolved_identifier; auto table_expression_modifiers = from_table_identifier.getTableExpressionModifiers(); @@ -7153,7 +7232,7 @@ void QueryAnalyzer::initializeTableExpressionData(const QueryTreeNodePtr & table alias_column_resolve_scope.context = scope.context; /// Initialize aliases in alias column scope - QueryExpressionsAliasVisitor visitor(alias_column_resolve_scope); + QueryExpressionsAliasVisitor visitor(alias_column_resolve_scope.aliases); visitor.visit(alias_column_to_resolve->getExpression()); resolveExpressionNode(alias_column_resolve_scope.scope_node, @@ -7523,22 +7602,25 @@ void QueryAnalyzer::resolveArrayJoin(QueryTreeNodePtr & array_join_node, Identif for (auto & array_join_expression : array_join_nodes) { auto array_join_expression_alias = array_join_expression->getAlias(); - if (!array_join_expression_alias.empty() && scope.alias_name_to_expression_node->contains(array_join_expression_alias)) - throw Exception(ErrorCodes::MULTIPLE_EXPRESSIONS_FOR_ALIAS, - "ARRAY JOIN expression {} with duplicate alias {}. In scope {}", - array_join_expression->formatASTForErrorMessage(), - array_join_expression_alias, - scope.scope_node->formatASTForErrorMessage()); - /// Add array join expression into scope - expressions_visitor.visit(array_join_expression); + for (const auto & elem : array_join_nodes) + { + if (elem->hasAlias()) + scope.aliases.array_join_aliases.insert(elem->getAlias()); + + for (auto & child : elem->getChildren()) + { + if (child) + expressions_visitor.visit(child); + } + } std::string identifier_full_name; if (auto * identifier_node = array_join_expression->as()) identifier_full_name = identifier_node->getIdentifier().getFullName(); - resolveExpressionNode(array_join_expression, scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/); + resolveExpressionNode(array_join_expression, scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/, true /*ignore_alias*/); auto process_array_join_expression = [&](QueryTreeNodePtr & expression) { @@ -7605,27 +7687,7 @@ void QueryAnalyzer::resolveArrayJoin(QueryTreeNodePtr & array_join_node, Identif } } - /** Allow to resolve ARRAY JOIN columns from aliases with types after ARRAY JOIN only after ARRAY JOIN expression list is resolved, because - * during resolution of ARRAY JOIN expression list we must use column type before ARRAY JOIN. - * - * Example: SELECT id, value_element FROM test_table ARRAY JOIN [[1,2,3]] AS value_element, value_element AS value - * It is expected that `value_element AS value` expression inside ARRAY JOIN expression list will be - * resolved as `value_element` expression with type before ARRAY JOIN. - * And it is expected that `value_element` inside projection expression list will be resolved as `value_element` expression - * with type after ARRAY JOIN. - */ array_join_nodes = std::move(array_join_column_expressions); - for (auto & array_join_column_expression : array_join_nodes) - { - auto it = scope.alias_name_to_expression_node->find(array_join_column_expression->getAlias()); - if (it != scope.alias_name_to_expression_node->end()) - { - auto & array_join_column_expression_typed = array_join_column_expression->as(); - auto array_join_column = std::make_shared(array_join_column_expression_typed.getColumn(), - array_join_column_expression_typed.getColumnSource()); - it->second = std::move(array_join_column); - } - } } void QueryAnalyzer::checkDuplicateTableNamesOrAlias(const QueryTreeNodePtr & join_node, QueryTreeNodePtr & left_table_expr, QueryTreeNodePtr & right_table_expr, IdentifierResolveScope & scope) @@ -7915,7 +7977,7 @@ void QueryAnalyzer::resolveQueryJoinTreeNode(QueryTreeNodePtr & join_tree_node, if (alias_name.empty()) return; - auto [it, inserted] = scope.alias_name_to_table_expression_node.emplace(alias_name, table_expression_node); + auto [it, inserted] = scope.aliases.alias_name_to_table_expression_node.emplace(alias_name, table_expression_node); if (!inserted) throw Exception(ErrorCodes::MULTIPLE_EXPRESSIONS_FOR_ALIAS, "Duplicate aliases {} for table expressions in FROM section are not allowed. Try to register {}. Already registered {}.", @@ -7984,7 +8046,7 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier throw Exception(ErrorCodes::NOT_IMPLEMENTED, "WITH TOTALS and WITH ROLLUP or CUBE are not supported together in presence of QUALIFY"); /// Initialize aliases in query node scope - QueryExpressionsAliasVisitor visitor(scope); + QueryExpressionsAliasVisitor visitor(scope.aliases); if (query_node_typed.hasWith()) visitor.visit(query_node_typed.getWithNode()); @@ -8102,7 +8164,7 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier table_expressions_visitor.visit(query_node_typed.getJoinTree()); initializeQueryJoinTreeNode(query_node_typed.getJoinTree(), scope); - scope.alias_name_to_table_expression_node.clear(); + scope.aliases.alias_name_to_table_expression_node.clear(); resolveQueryJoinTreeNode(query_node_typed.getJoinTree(), scope, visitor); } @@ -8152,10 +8214,10 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier /// Clone is needed cause aliases share subtrees. /// If not clone, the same (shared) subtree could be resolved again with different (Nullable) type /// See 03023_group_by_use_nulls_analyzer_crashes - for (auto & [key, node] : scope.alias_name_to_expression_node_before_group_by) - scope.alias_name_to_expression_node_after_group_by[key] = node->clone(); + for (auto & [key, node] : scope.aliases.alias_name_to_expression_node_before_group_by) + scope.aliases.alias_name_to_expression_node_after_group_by[key] = node->clone(); - scope.alias_name_to_expression_node = &scope.alias_name_to_expression_node_after_group_by; + scope.aliases.alias_name_to_expression_node = &scope.aliases.alias_name_to_expression_node_after_group_by; } if (query_node_typed.hasHaving()) @@ -8227,7 +8289,7 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier * After scope nodes are resolved, we can compare node with duplicate alias with * node from scope alias table. */ - for (const auto & node_with_duplicated_alias : scope.cloned_nodes_with_duplicated_aliases) + for (const auto & node_with_duplicated_alias : scope.aliases.cloned_nodes_with_duplicated_aliases) { auto node = node_with_duplicated_alias; auto node_alias = node->getAlias(); @@ -8238,8 +8300,8 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier bool has_node_in_alias_table = false; - auto it = scope.alias_name_to_expression_node->find(node_alias); - if (it != scope.alias_name_to_expression_node->end()) + auto it = scope.aliases.alias_name_to_expression_node->find(node_alias); + if (it != scope.aliases.alias_name_to_expression_node->end()) { has_node_in_alias_table = true; @@ -8252,8 +8314,8 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier scope.scope_node->formatASTForErrorMessage()); } - it = scope.alias_name_to_lambda_node.find(node_alias); - if (it != scope.alias_name_to_lambda_node.end()) + it = scope.aliases.alias_name_to_lambda_node.find(node_alias); + if (it != scope.aliases.alias_name_to_lambda_node.end()) { has_node_in_alias_table = true; @@ -8298,10 +8360,10 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier /// Remove aliases from expression and lambda nodes - for (auto & [_, node] : *scope.alias_name_to_expression_node) + for (auto & [_, node] : *scope.aliases.alias_name_to_expression_node) node->removeAlias(); - for (auto & [_, node] : scope.alias_name_to_lambda_node) + for (auto & [_, node] : scope.aliases.alias_name_to_lambda_node) node->removeAlias(); query_node_typed.resolveProjectionColumns(std::move(projection_columns)); diff --git a/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp b/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp index 513dd0054d6..a82ad3dced1 100644 --- a/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp +++ b/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp @@ -108,7 +108,7 @@ public: } private: - static inline void resolveAsAggregateFunctionWithIf(FunctionNode & function_node, const DataTypes & argument_types) + static void resolveAsAggregateFunctionWithIf(FunctionNode & function_node, const DataTypes & argument_types) { auto result_type = function_node.getResultType(); diff --git a/src/Analyzer/Passes/RewriteSumFunctionWithSumAndCountPass.cpp b/src/Analyzer/Passes/RewriteSumFunctionWithSumAndCountPass.cpp index 917256bf4b1..5646d26f7f6 100644 --- a/src/Analyzer/Passes/RewriteSumFunctionWithSumAndCountPass.cpp +++ b/src/Analyzer/Passes/RewriteSumFunctionWithSumAndCountPass.cpp @@ -110,7 +110,7 @@ private: function_node.resolveAsFunction(function->build(function_node.getArgumentColumns())); } - static inline void resolveAsAggregateFunctionNode(FunctionNode & function_node, const DataTypePtr & argument_type) + static void resolveAsAggregateFunctionNode(FunctionNode & function_node, const DataTypePtr & argument_type) { AggregateFunctionProperties properties; const auto aggregate_function = AggregateFunctionFactory::instance().get(function_node.getFunctionName(), diff --git a/src/Analyzer/Passes/SumIfToCountIfPass.cpp b/src/Analyzer/Passes/SumIfToCountIfPass.cpp index 1a4712aa697..852cbe75c4a 100644 --- a/src/Analyzer/Passes/SumIfToCountIfPass.cpp +++ b/src/Analyzer/Passes/SumIfToCountIfPass.cpp @@ -156,7 +156,7 @@ public: } private: - static inline void resolveAsCountIfAggregateFunction(FunctionNode & function_node, const DataTypePtr & argument_type) + static void resolveAsCountIfAggregateFunction(FunctionNode & function_node, const DataTypePtr & argument_type) { AggregateFunctionProperties properties; auto aggregate_function = AggregateFunctionFactory::instance().get( @@ -165,7 +165,7 @@ private: function_node.resolveAsAggregateFunction(std::move(aggregate_function)); } - inline QueryTreeNodePtr getMultiplyFunction(QueryTreeNodePtr left, QueryTreeNodePtr right) + QueryTreeNodePtr getMultiplyFunction(QueryTreeNodePtr left, QueryTreeNodePtr right) { auto multiply_function_node = std::make_shared("multiply"); auto & multiply_arguments_nodes = multiply_function_node->getArguments().getNodes(); diff --git a/src/Analyzer/ValidationUtils.cpp b/src/Analyzer/ValidationUtils.cpp index b7f9307e4b3..c142a0c7cc0 100644 --- a/src/Analyzer/ValidationUtils.cpp +++ b/src/Analyzer/ValidationUtils.cpp @@ -331,6 +331,9 @@ void validateAggregates(const QueryTreeNodePtr & query_node, AggregatesValidatio if (query_node_typed.hasOrderBy()) validate_group_by_columns_visitor.visit(query_node_typed.getOrderByNode()); + if (query_node_typed.hasInterpolate()) + validate_group_by_columns_visitor.visit(query_node_typed.getInterpolate()); + validate_group_by_columns_visitor.visit(query_node_typed.getProjectionNode()); } diff --git a/src/Backups/BackupIO_AzureBlobStorage.cpp b/src/Backups/BackupIO_AzureBlobStorage.cpp index 331cace67d7..8c3c5327e94 100644 --- a/src/Backups/BackupIO_AzureBlobStorage.cpp +++ b/src/Backups/BackupIO_AzureBlobStorage.cpp @@ -6,7 +6,6 @@ #include #include #include -#include #include #include #include @@ -30,7 +29,7 @@ namespace ErrorCodes } BackupReaderAzureBlobStorage::BackupReaderAzureBlobStorage( - StorageAzureBlob::Configuration configuration_, + const StorageAzureConfiguration & configuration_, bool allow_azure_native_copy, const ReadSettings & read_settings_, const WriteSettings & write_settings_, @@ -39,15 +38,14 @@ BackupReaderAzureBlobStorage::BackupReaderAzureBlobStorage( , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, configuration_.getConnectionURL().toString(), false, false} , configuration(configuration_) { - auto client_ptr = StorageAzureBlob::createClient(configuration, /* is_read_only */ false); + auto client_ptr = configuration.createClient(/* is_readonly */false, /* attempt_to_create_container */true); client_ptr->SetClickhouseOptions(Azure::Storage::Blobs::ClickhouseClientOptions{.IsClientForDisk=true}); - object_storage = std::make_unique( - "BackupReaderAzureBlobStorage", - std::move(client_ptr), - StorageAzureBlob::createSettings(context_), - configuration.container, - configuration.getConnectionURL().toString()); + object_storage = std::make_unique("BackupReaderAzureBlobStorage", + std::move(client_ptr), + configuration.createSettings(context_), + configuration_.container, + configuration.getConnectionURL().toString()); client = object_storage->getAzureBlobStorageClient(); auto settings_copy = *object_storage->getSettings(); @@ -121,7 +119,7 @@ void BackupReaderAzureBlobStorage::copyFileToDisk(const String & path_in_backup, BackupWriterAzureBlobStorage::BackupWriterAzureBlobStorage( - StorageAzureBlob::Configuration configuration_, + const StorageAzureConfiguration & configuration_, bool allow_azure_native_copy, const ReadSettings & read_settings_, const WriteSettings & write_settings_, @@ -131,13 +129,13 @@ BackupWriterAzureBlobStorage::BackupWriterAzureBlobStorage( , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, configuration_.getConnectionURL().toString(), false, false} , configuration(configuration_) { - auto client_ptr = StorageAzureBlob::createClient(configuration, /* is_read_only */ false, attempt_to_create_container); + auto client_ptr = configuration.createClient(/* is_readonly */false, attempt_to_create_container); client_ptr->SetClickhouseOptions(Azure::Storage::Blobs::ClickhouseClientOptions{.IsClientForDisk=true}); object_storage = std::make_unique("BackupWriterAzureBlobStorage", std::move(client_ptr), - StorageAzureBlob::createSettings(context_), - configuration_.container, + configuration.createSettings(context_), + configuration.container, configuration_.getConnectionURL().toString()); client = object_storage->getAzureBlobStorageClient(); auto settings_copy = *object_storage->getSettings(); @@ -145,8 +143,13 @@ BackupWriterAzureBlobStorage::BackupWriterAzureBlobStorage( settings = std::make_unique(settings_copy); } -void BackupWriterAzureBlobStorage::copyFileFromDisk(const String & path_in_backup, DiskPtr src_disk, const String & src_path, - bool copy_encrypted, UInt64 start_pos, UInt64 length) +void BackupWriterAzureBlobStorage::copyFileFromDisk( + const String & path_in_backup, + DiskPtr src_disk, + const String & src_path, + bool copy_encrypted, + UInt64 start_pos, + UInt64 length) { /// Use the native copy as a more optimal way to copy a file from AzureBlobStorage to AzureBlobStorage if it's possible. auto source_data_source_description = src_disk->getDataSourceDescription(); @@ -196,10 +199,16 @@ void BackupWriterAzureBlobStorage::copyFile(const String & destination, const St threadPoolCallbackRunnerUnsafe(getBackupsIOThreadPool().get(), "BackupWRAzure")); } -void BackupWriterAzureBlobStorage::copyDataToFile(const String & path_in_backup, const CreateReadBufferFunction & create_read_buffer, UInt64 start_pos, UInt64 length) +void BackupWriterAzureBlobStorage::copyDataToFile( + const String & path_in_backup, + const CreateReadBufferFunction & create_read_buffer, + UInt64 start_pos, + UInt64 length) { - copyDataToAzureBlobStorageFile(create_read_buffer, start_pos, length, client, configuration.container, fs::path(configuration.blob_path) / path_in_backup, settings, - threadPoolCallbackRunnerUnsafe(getBackupsIOThreadPool().get(), "BackupWRAzure")); + copyDataToAzureBlobStorageFile( + create_read_buffer, start_pos, length, client, configuration.container, + fs::path(configuration.blob_path) / path_in_backup, settings, + threadPoolCallbackRunnerUnsafe(getBackupsIOThreadPool().get(), "BackupWRAzure")); } BackupWriterAzureBlobStorage::~BackupWriterAzureBlobStorage() = default; @@ -217,7 +226,7 @@ UInt64 BackupWriterAzureBlobStorage::getFileSize(const String & file_name) object_storage->listObjects(key,children,/*max_keys*/0); if (children.empty()) throw Exception(ErrorCodes::AZURE_BLOB_STORAGE_ERROR, "Object must exist"); - return children[0].metadata.size_bytes; + return children[0]->metadata->size_bytes; } std::unique_ptr BackupWriterAzureBlobStorage::readFile(const String & file_name, size_t /*expected_file_size*/) diff --git a/src/Backups/BackupIO_AzureBlobStorage.h b/src/Backups/BackupIO_AzureBlobStorage.h index 3a909ab684a..61688107839 100644 --- a/src/Backups/BackupIO_AzureBlobStorage.h +++ b/src/Backups/BackupIO_AzureBlobStorage.h @@ -5,8 +5,8 @@ #if USE_AZURE_BLOB_STORAGE #include #include -#include #include +#include namespace DB @@ -17,24 +17,30 @@ class BackupReaderAzureBlobStorage : public BackupReaderDefault { public: BackupReaderAzureBlobStorage( - StorageAzureBlob::Configuration configuration_, + const StorageAzureConfiguration & configuration_, bool allow_azure_native_copy, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_); + ~BackupReaderAzureBlobStorage() override; bool fileExists(const String & file_name) override; UInt64 getFileSize(const String & file_name) override; std::unique_ptr readFile(const String & file_name) override; - void copyFileToDisk(const String & path_in_backup, size_t file_size, bool encrypted_in_backup, - DiskPtr destination_disk, const String & destination_path, WriteMode write_mode) override; + void copyFileToDisk( + const String & path_in_backup, + size_t file_size, + bool encrypted_in_backup, + DiskPtr destination_disk, + const String & destination_path, + WriteMode write_mode) override; private: const DataSourceDescription data_source_description; std::shared_ptr client; - StorageAzureBlob::Configuration configuration; + StorageAzureConfiguration configuration; std::unique_ptr object_storage; std::shared_ptr settings; }; @@ -43,21 +49,32 @@ class BackupWriterAzureBlobStorage : public BackupWriterDefault { public: BackupWriterAzureBlobStorage( - StorageAzureBlob::Configuration configuration_, + const StorageAzureConfiguration & configuration_, bool allow_azure_native_copy, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_, bool attempt_to_create_container); + ~BackupWriterAzureBlobStorage() override; bool fileExists(const String & file_name) override; UInt64 getFileSize(const String & file_name) override; std::unique_ptr writeFile(const String & file_name) override; - void copyDataToFile(const String & path_in_backup, const CreateReadBufferFunction & create_read_buffer, UInt64 start_pos, UInt64 length) override; - void copyFileFromDisk(const String & path_in_backup, DiskPtr src_disk, const String & src_path, - bool copy_encrypted, UInt64 start_pos, UInt64 length) override; + void copyDataToFile( + const String & path_in_backup, + const CreateReadBufferFunction & create_read_buffer, + UInt64 start_pos, + UInt64 length) override; + + void copyFileFromDisk( + const String & path_in_backup, + DiskPtr src_disk, + const String & src_path, + bool copy_encrypted, + UInt64 start_pos, + UInt64 length) override; void copyFile(const String & destination, const String & source, size_t size) override; @@ -67,9 +84,10 @@ public: private: std::unique_ptr readFile(const String & file_name, size_t expected_file_size) override; void removeFilesBatch(const Strings & file_names); + const DataSourceDescription data_source_description; std::shared_ptr client; - StorageAzureBlob::Configuration configuration; + StorageAzureConfiguration configuration; std::unique_ptr object_storage; std::shared_ptr settings; }; diff --git a/src/Backups/BackupIO_S3.cpp b/src/Backups/BackupIO_S3.cpp index 15860363615..92f086295a0 100644 --- a/src/Backups/BackupIO_S3.cpp +++ b/src/Backups/BackupIO_S3.cpp @@ -131,10 +131,10 @@ BackupReaderS3::BackupReaderS3( : BackupReaderDefault(read_settings_, write_settings_, getLogger("BackupReaderS3")) , s3_uri(s3_uri_) , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::S3, MetadataStorageType::None, s3_uri.endpoint, false, false} - , s3_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString(), context_->getUserName(), /*ignore_user=*/is_internal_backup)) + , s3_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString(), context_->getUserName(), /*ignore_user=*/is_internal_backup).value_or(S3Settings{})) { auto & request_settings = s3_settings.request_settings; - request_settings.updateFromSettings(context_->getSettingsRef()); + request_settings.updateFromSettingsIfChanged(context_->getSettingsRef()); request_settings.max_single_read_retries = context_->getSettingsRef().s3_max_single_read_retries; // FIXME: Avoid taking value for endpoint request_settings.allow_native_copy = allow_s3_native_copy; client = makeS3Client(s3_uri_, access_key_id_, secret_access_key_, s3_settings, context_); @@ -188,6 +188,7 @@ void BackupReaderS3::copyFileToDisk(const String & path_in_backup, size_t file_s fs::path(s3_uri.key) / path_in_backup, 0, file_size, + /* dest_s3_client= */ destination_disk->getS3StorageClient(), /* dest_bucket= */ blob_path[1], /* dest_key= */ blob_path[0], s3_settings.request_settings, @@ -222,10 +223,10 @@ BackupWriterS3::BackupWriterS3( : BackupWriterDefault(read_settings_, write_settings_, getLogger("BackupWriterS3")) , s3_uri(s3_uri_) , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::S3, MetadataStorageType::None, s3_uri.endpoint, false, false} - , s3_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString(), context_->getUserName(), /*ignore_user=*/is_internal_backup)) + , s3_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString(), context_->getUserName(), /*ignore_user=*/is_internal_backup).value_or(S3Settings{})) { auto & request_settings = s3_settings.request_settings; - request_settings.updateFromSettings(context_->getSettingsRef()); + request_settings.updateFromSettingsIfChanged(context_->getSettingsRef()); request_settings.max_single_read_retries = context_->getSettingsRef().s3_max_single_read_retries; // FIXME: Avoid taking value for endpoint request_settings.allow_native_copy = allow_s3_native_copy; request_settings.setStorageClassName(storage_class_name); @@ -252,18 +253,20 @@ void BackupWriterS3::copyFileFromDisk(const String & path_in_backup, DiskPtr src { LOG_TRACE(log, "Copying file {} from disk {} to S3", src_path, src_disk->getName()); copyS3File( - client, + src_disk->getS3StorageClient(), /* src_bucket */ blob_path[1], /* src_key= */ blob_path[0], start_pos, length, - s3_uri.bucket, - fs::path(s3_uri.key) / path_in_backup, + /* dest_s3_client= */ client, + /* dest_bucket= */ s3_uri.bucket, + /* dest_key= */ fs::path(s3_uri.key) / path_in_backup, s3_settings.request_settings, read_settings, blob_storage_log, {}, - threadPoolCallbackRunnerUnsafe(getBackupsIOThreadPool().get(), "BackupWriterS3")); + threadPoolCallbackRunnerUnsafe(getBackupsIOThreadPool().get(), "BackupWriterS3"), + /*for_disk_s3=*/false); return; /// copied! } } @@ -281,8 +284,9 @@ void BackupWriterS3::copyFile(const String & destination, const String & source, /* src_key= */ fs::path(s3_uri.key) / source, 0, size, - s3_uri.bucket, - fs::path(s3_uri.key) / destination, + /* dest_s3_client= */ client, + /* dest_bucket= */ s3_uri.bucket, + /* dest_key= */ fs::path(s3_uri.key) / destination, s3_settings.request_settings, read_settings, blob_storage_log, diff --git a/src/Backups/registerBackupEngineAzureBlobStorage.cpp b/src/Backups/registerBackupEngineAzureBlobStorage.cpp index 8b05965f472..81e3c104da1 100644 --- a/src/Backups/registerBackupEngineAzureBlobStorage.cpp +++ b/src/Backups/registerBackupEngineAzureBlobStorage.cpp @@ -5,11 +5,11 @@ #if USE_AZURE_BLOB_STORAGE #include -#include #include #include #include #include +#include #include #endif @@ -49,7 +49,7 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory) const String & id_arg = params.backup_info.id_arg; const auto & args = params.backup_info.args; - StorageAzureBlob::Configuration configuration; + StorageAzureConfiguration configuration; if (!id_arg.empty()) { @@ -81,10 +81,11 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory) } if (args.size() > 1) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Backup AzureBlobStorage requires 1 or 2 arguments: named_collection, [filename]"); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Backup AzureBlobStorage requires 1 or 2 arguments: named_collection, [filename]"); if (args.size() == 1) - configuration.blob_path = args[0].safeGet(); + configuration.setPath(args[0].safeGet()); } else @@ -116,12 +117,16 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory) } BackupImpl::ArchiveParams archive_params; - if (hasRegisteredArchiveFileExtension(configuration.blob_path)) + if (hasRegisteredArchiveFileExtension(configuration.getPath())) { if (params.is_internal_backup) throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Using archives with backups on clusters is disabled"); - archive_params.archive_name = removeFileNameFromURL(configuration.blob_path); + auto path = configuration.getPath(); + auto filename = removeFileNameFromURL(path); + configuration.setPath(path); + + archive_params.archive_name = filename; archive_params.compression_method = params.compression_method; archive_params.compression_level = params.compression_level; archive_params.password = params.password; diff --git a/src/BridgeHelper/CatBoostLibraryBridgeHelper.h b/src/BridgeHelper/CatBoostLibraryBridgeHelper.h index 55dfd715f00..5d5c6d01705 100644 --- a/src/BridgeHelper/CatBoostLibraryBridgeHelper.h +++ b/src/BridgeHelper/CatBoostLibraryBridgeHelper.h @@ -14,8 +14,8 @@ namespace DB class CatBoostLibraryBridgeHelper final : public LibraryBridgeHelper { public: - static constexpr inline auto PING_HANDLER = "/catboost_ping"; - static constexpr inline auto MAIN_HANDLER = "/catboost_request"; + static constexpr auto PING_HANDLER = "/catboost_ping"; + static constexpr auto MAIN_HANDLER = "/catboost_request"; explicit CatBoostLibraryBridgeHelper( ContextPtr context_, @@ -38,11 +38,11 @@ protected: bool bridgeHandShake() override; private: - static constexpr inline auto CATBOOST_LIST_METHOD = "catboost_list"; - static constexpr inline auto CATBOOST_REMOVEMODEL_METHOD = "catboost_removeModel"; - static constexpr inline auto CATBOOST_REMOVEALLMODELS_METHOD = "catboost_removeAllModels"; - static constexpr inline auto CATBOOST_GETTREECOUNT_METHOD = "catboost_GetTreeCount"; - static constexpr inline auto CATBOOST_LIB_EVALUATE_METHOD = "catboost_libEvaluate"; + static constexpr auto CATBOOST_LIST_METHOD = "catboost_list"; + static constexpr auto CATBOOST_REMOVEMODEL_METHOD = "catboost_removeModel"; + static constexpr auto CATBOOST_REMOVEALLMODELS_METHOD = "catboost_removeAllModels"; + static constexpr auto CATBOOST_GETTREECOUNT_METHOD = "catboost_GetTreeCount"; + static constexpr auto CATBOOST_LIB_EVALUATE_METHOD = "catboost_libEvaluate"; Poco::URI createRequestURI(const String & method) const; diff --git a/src/BridgeHelper/ExternalDictionaryLibraryBridgeHelper.h b/src/BridgeHelper/ExternalDictionaryLibraryBridgeHelper.h index 5632fd2a28e..63816aa63ef 100644 --- a/src/BridgeHelper/ExternalDictionaryLibraryBridgeHelper.h +++ b/src/BridgeHelper/ExternalDictionaryLibraryBridgeHelper.h @@ -25,8 +25,8 @@ public: String dict_attributes; }; - static constexpr inline auto PING_HANDLER = "/extdict_ping"; - static constexpr inline auto MAIN_HANDLER = "/extdict_request"; + static constexpr auto PING_HANDLER = "/extdict_ping"; + static constexpr auto MAIN_HANDLER = "/extdict_request"; ExternalDictionaryLibraryBridgeHelper(ContextPtr context_, const Block & sample_block, const Field & dictionary_id_, const LibraryInitData & library_data_); @@ -62,14 +62,14 @@ protected: ReadWriteBufferFromHTTP::OutStreamCallback getInitLibraryCallback() const; private: - static constexpr inline auto EXT_DICT_LIB_NEW_METHOD = "extDict_libNew"; - static constexpr inline auto EXT_DICT_LIB_CLONE_METHOD = "extDict_libClone"; - static constexpr inline auto EXT_DICT_LIB_DELETE_METHOD = "extDict_libDelete"; - static constexpr inline auto EXT_DICT_LOAD_ALL_METHOD = "extDict_loadAll"; - static constexpr inline auto EXT_DICT_LOAD_IDS_METHOD = "extDict_loadIds"; - static constexpr inline auto EXT_DICT_LOAD_KEYS_METHOD = "extDict_loadKeys"; - static constexpr inline auto EXT_DICT_IS_MODIFIED_METHOD = "extDict_isModified"; - static constexpr inline auto EXT_DICT_SUPPORTS_SELECTIVE_LOAD_METHOD = "extDict_supportsSelectiveLoad"; + static constexpr auto EXT_DICT_LIB_NEW_METHOD = "extDict_libNew"; + static constexpr auto EXT_DICT_LIB_CLONE_METHOD = "extDict_libClone"; + static constexpr auto EXT_DICT_LIB_DELETE_METHOD = "extDict_libDelete"; + static constexpr auto EXT_DICT_LOAD_ALL_METHOD = "extDict_loadAll"; + static constexpr auto EXT_DICT_LOAD_IDS_METHOD = "extDict_loadIds"; + static constexpr auto EXT_DICT_LOAD_KEYS_METHOD = "extDict_loadKeys"; + static constexpr auto EXT_DICT_IS_MODIFIED_METHOD = "extDict_isModified"; + static constexpr auto EXT_DICT_SUPPORTS_SELECTIVE_LOAD_METHOD = "extDict_supportsSelectiveLoad"; Poco::URI createRequestURI(const String & method) const; diff --git a/src/BridgeHelper/IBridgeHelper.h b/src/BridgeHelper/IBridgeHelper.h index 6812bd04a03..8ce1c0e143a 100644 --- a/src/BridgeHelper/IBridgeHelper.h +++ b/src/BridgeHelper/IBridgeHelper.h @@ -16,9 +16,9 @@ class IBridgeHelper: protected WithContext { public: - static constexpr inline auto DEFAULT_HOST = "127.0.0.1"; - static constexpr inline auto DEFAULT_FORMAT = "RowBinary"; - static constexpr inline auto PING_OK_ANSWER = "Ok."; + static constexpr auto DEFAULT_HOST = "127.0.0.1"; + static constexpr auto DEFAULT_FORMAT = "RowBinary"; + static constexpr auto PING_OK_ANSWER = "Ok."; static const inline std::string PING_METHOD = Poco::Net::HTTPRequest::HTTP_GET; static const inline std::string MAIN_METHOD = Poco::Net::HTTPRequest::HTTP_POST; diff --git a/src/BridgeHelper/LibraryBridgeHelper.h b/src/BridgeHelper/LibraryBridgeHelper.h index 8940f9d1c9e..0c56fe7a221 100644 --- a/src/BridgeHelper/LibraryBridgeHelper.h +++ b/src/BridgeHelper/LibraryBridgeHelper.h @@ -37,7 +37,7 @@ protected: Poco::URI createBaseURI() const override; - static constexpr inline size_t DEFAULT_PORT = 9012; + static constexpr size_t DEFAULT_PORT = 9012; const Poco::Util::AbstractConfiguration & config; LoggerPtr log; diff --git a/src/BridgeHelper/XDBCBridgeHelper.h b/src/BridgeHelper/XDBCBridgeHelper.h index b557e12b85b..5f4c7fd8381 100644 --- a/src/BridgeHelper/XDBCBridgeHelper.h +++ b/src/BridgeHelper/XDBCBridgeHelper.h @@ -52,12 +52,12 @@ class XDBCBridgeHelper : public IXDBCBridgeHelper { public: - static constexpr inline auto DEFAULT_PORT = BridgeHelperMixin::DEFAULT_PORT; - static constexpr inline auto PING_HANDLER = "/ping"; - static constexpr inline auto MAIN_HANDLER = "/"; - static constexpr inline auto COL_INFO_HANDLER = "/columns_info"; - static constexpr inline auto IDENTIFIER_QUOTE_HANDLER = "/identifier_quote"; - static constexpr inline auto SCHEMA_ALLOWED_HANDLER = "/schema_allowed"; + static constexpr auto DEFAULT_PORT = BridgeHelperMixin::DEFAULT_PORT; + static constexpr auto PING_HANDLER = "/ping"; + static constexpr auto MAIN_HANDLER = "/"; + static constexpr auto COL_INFO_HANDLER = "/columns_info"; + static constexpr auto IDENTIFIER_QUOTE_HANDLER = "/identifier_quote"; + static constexpr auto SCHEMA_ALLOWED_HANDLER = "/schema_allowed"; XDBCBridgeHelper( ContextPtr context_, @@ -256,7 +256,7 @@ protected: struct JDBCBridgeMixin { - static constexpr inline auto DEFAULT_PORT = 9019; + static constexpr auto DEFAULT_PORT = 9019; static String configPrefix() { @@ -287,7 +287,7 @@ struct JDBCBridgeMixin struct ODBCBridgeMixin { - static constexpr inline auto DEFAULT_PORT = 9018; + static constexpr auto DEFAULT_PORT = 9018; static String configPrefix() { diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 4e8946facda..f2e10a27b75 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -115,8 +115,11 @@ if (TARGET ch_contrib::nats_io) add_headers_and_sources(dbms Storages/NATS) endif() -add_headers_and_sources(dbms Storages/DataLakes) -add_headers_and_sources(dbms Storages/DataLakes/Iceberg) +add_headers_and_sources(dbms Storages/ObjectStorage) +add_headers_and_sources(dbms Storages/ObjectStorage/Azure) +add_headers_and_sources(dbms Storages/ObjectStorage/S3) +add_headers_and_sources(dbms Storages/ObjectStorage/HDFS) +add_headers_and_sources(dbms Storages/ObjectStorage/DataLakes) add_headers_and_sources(dbms Common/NamedCollections) if (TARGET ch_contrib::amqp_cpp) @@ -144,7 +147,6 @@ if (TARGET ch_contrib::azure_sdk) endif() if (TARGET ch_contrib::hdfs) - add_headers_and_sources(dbms Storages/HDFS) add_headers_and_sources(dbms Disks/ObjectStorages/HDFS) endif() diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index b6f821794f1..f8391c64d5a 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -643,6 +644,9 @@ try bool extras_into_stdout = need_render_progress || logs_into_stdout; bool select_only_into_file = select_into_file && !select_into_file_and_stdout; + if (!out_file_buf && default_output_compression_method != CompressionMethod::None) + out_file_buf = wrapWriteBufferWithCompressionMethod(out_buf, default_output_compression_method, 3, 0); + /// It is not clear how to write progress and logs /// intermixed with data with parallel formatting. /// It may increase code complexity significantly. @@ -735,7 +739,7 @@ bool ClientBase::isRegularFile(int fd) return fstat(fd, &file_stat) == 0 && S_ISREG(file_stat.st_mode); } -void ClientBase::setDefaultFormatsFromConfiguration() +void ClientBase::setDefaultFormatsAndCompressionFromConfiguration() { if (config().has("output-format")) { @@ -759,6 +763,10 @@ void ClientBase::setDefaultFormatsFromConfiguration() default_output_format = *format_from_file_name; else default_output_format = "TSV"; + + std::optional file_name = tryGetFileNameFromFileDescriptor(STDOUT_FILENO); + if (file_name) + default_output_compression_method = chooseCompressionMethod(*file_name, ""); } else if (is_interactive) { diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index 64cbdbe8989..7a0489641c8 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -190,7 +190,7 @@ protected: /// Adjust some settings after command line options and config had been processed. void adjustSettings(); - void setDefaultFormatsFromConfiguration(); + void setDefaultFormatsAndCompressionFromConfiguration(); void initTTYBuffer(ProgressOption progress); @@ -224,6 +224,7 @@ protected: String pager; String default_output_format; /// Query results output format. + CompressionMethod default_output_compression_method = CompressionMethod::None; String default_input_format; /// Tables' format for clickhouse-local. bool select_into_file = false; /// If writing result INTO OUTFILE. It affects progress rendering. diff --git a/src/Columns/ColumnArray.cpp b/src/Columns/ColumnArray.cpp index 8d5c246c48c..1e94240dd4c 100644 --- a/src/Columns/ColumnArray.cpp +++ b/src/Columns/ColumnArray.cpp @@ -1289,4 +1289,14 @@ size_t ColumnArray::getNumberOfDimensions() const return 1 + nested_array->getNumberOfDimensions(); /// Every modern C++ compiler optimizes tail recursion. } +void ColumnArray::takeDynamicStructureFromSourceColumns(const Columns & source_columns) +{ + Columns nested_source_columns; + nested_source_columns.reserve(source_columns.size()); + for (const auto & source_column : source_columns) + nested_source_columns.push_back(assert_cast(*source_column).getDataPtr()); + + data->takeDynamicStructureFromSourceColumns(nested_source_columns); +} + } diff --git a/src/Columns/ColumnArray.h b/src/Columns/ColumnArray.h index 230d8830265..53eb5166df8 100644 --- a/src/Columns/ColumnArray.h +++ b/src/Columns/ColumnArray.h @@ -175,6 +175,9 @@ public: size_t getNumberOfDimensions() const; + bool hasDynamicStructure() const override { return getData().hasDynamicStructure(); } + void takeDynamicStructureFromSourceColumns(const Columns & source_columns) override; + private: WrappedPtr data; WrappedPtr offsets; diff --git a/src/Columns/ColumnCompressed.h b/src/Columns/ColumnCompressed.h index 6763410b46d..934adf07cf4 100644 --- a/src/Columns/ColumnCompressed.h +++ b/src/Columns/ColumnCompressed.h @@ -122,6 +122,9 @@ public: UInt64 getNumberOfDefaultRows() const override { throwMustBeDecompressed(); } void getIndicesOfNonDefaultRows(Offsets &, size_t, size_t) const override { throwMustBeDecompressed(); } + bool hasDynamicStructure() const override { throwMustBeDecompressed(); } + void takeDynamicStructureFromSourceColumns(const Columns &) override { throwMustBeDecompressed(); } + protected: size_t rows; size_t bytes; diff --git a/src/Columns/ColumnConst.h b/src/Columns/ColumnConst.h index 4a3d40ca0d2..c2c0fa3027c 100644 --- a/src/Columns/ColumnConst.h +++ b/src/Columns/ColumnConst.h @@ -306,6 +306,8 @@ public: T getValue() const { return static_cast(getField().safeGet()); } bool isCollationSupported() const override { return data->isCollationSupported(); } + + bool hasDynamicStructure() const override { return data->hasDynamicStructure(); } }; ColumnConst::Ptr createColumnConst(const ColumnPtr & column, Field value); diff --git a/src/Columns/ColumnDecimal.h b/src/Columns/ColumnDecimal.h index e0ea26744dc..e606aaaff0f 100644 --- a/src/Columns/ColumnDecimal.h +++ b/src/Columns/ColumnDecimal.h @@ -141,6 +141,14 @@ protected: UInt32 scale; }; +template +concept is_col_over_big_decimal = std::is_same_v> + && is_decimal && is_over_big_int; + +template +concept is_col_int_decimal = std::is_same_v> + && is_decimal && std::is_integral_v; + template class ColumnVector; template struct ColumnVectorOrDecimalT { using Col = ColumnVector; }; template struct ColumnVectorOrDecimalT { using Col = ColumnDecimal; }; diff --git a/src/Columns/ColumnDynamic.cpp b/src/Columns/ColumnDynamic.cpp new file mode 100644 index 00000000000..3c147b6f123 --- /dev/null +++ b/src/Columns/ColumnDynamic.cpp @@ -0,0 +1,758 @@ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; + extern const int PARAMETER_OUT_OF_BOUND; +} + + +ColumnDynamic::ColumnDynamic(size_t max_dynamic_types_) : max_dynamic_types(max_dynamic_types_) +{ + /// Create empty Variant. + variant_info.variant_type = std::make_shared(DataTypes{}); + variant_info.variant_name = variant_info.variant_type->getName(); + variant_column = variant_info.variant_type->createColumn(); +} + +ColumnDynamic::ColumnDynamic( + MutableColumnPtr variant_column_, const VariantInfo & variant_info_, size_t max_dynamic_types_, const Statistics & statistics_) + : variant_column(std::move(variant_column_)) + , variant_info(variant_info_) + , max_dynamic_types(max_dynamic_types_) + , statistics(statistics_) +{ +} + +ColumnDynamic::MutablePtr ColumnDynamic::create(MutableColumnPtr variant_column, const DataTypePtr & variant_type, size_t max_dynamic_types_, const Statistics & statistics_) +{ + VariantInfo variant_info; + variant_info.variant_type = variant_type; + variant_info.variant_name = variant_type->getName(); + const auto & variants = assert_cast(*variant_type).getVariants(); + variant_info.variant_names.reserve(variants.size()); + variant_info.variant_name_to_discriminator.reserve(variants.size()); + for (ColumnVariant::Discriminator discr = 0; discr != variants.size(); ++discr) + { + const auto & variant_name = variant_info.variant_names.emplace_back(variants[discr]->getName()); + variant_info.variant_name_to_discriminator[variant_name] = discr; + } + + return create(std::move(variant_column), variant_info, max_dynamic_types_, statistics_); +} + +bool ColumnDynamic::addNewVariant(const DB::DataTypePtr & new_variant) +{ + /// Check if we already have such variant. + if (variant_info.variant_name_to_discriminator.contains(new_variant->getName())) + return true; + + /// Check if we reached maximum number of variants. + if (variant_info.variant_names.size() >= max_dynamic_types) + { + /// ColumnDynamic can have max_dynamic_types number of variants only when it has String as a variant. + /// Otherwise we won't be able to cast new variants to Strings. + if (!variant_info.variant_name_to_discriminator.contains("String")) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Maximum number of variants reached, but no String variant exists"); + + return false; + } + + /// If we have (max_dynamic_types - 1) number of variants and don't have String variant, we can add only String variant. + if (variant_info.variant_names.size() == max_dynamic_types - 1 && new_variant->getName() != "String" && !variant_info.variant_name_to_discriminator.contains("String")) + return false; + + const DataTypes & current_variants = assert_cast(*variant_info.variant_type).getVariants(); + DataTypes all_variants = current_variants; + all_variants.push_back(new_variant); + auto new_variant_type = std::make_shared(all_variants); + updateVariantInfoAndExpandVariantColumn(new_variant_type); + return true; +} + +void ColumnDynamic::addStringVariant() +{ + if (!addNewVariant(std::make_shared())) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot add String variant to Dynamic column, it's a bug"); +} + +void ColumnDynamic::updateVariantInfoAndExpandVariantColumn(const DB::DataTypePtr & new_variant_type) +{ + const DataTypes & current_variants = assert_cast(variant_info.variant_type.get())->getVariants(); + const DataTypes & new_variants = assert_cast(new_variant_type.get())->getVariants(); + + Names new_variant_names; + new_variant_names.reserve(new_variants.size()); + std::unordered_map new_variant_name_to_discriminator; + new_variant_name_to_discriminator.reserve(new_variants.size()); + std::vector> new_variant_columns_and_discriminators_to_add; + new_variant_columns_and_discriminators_to_add.reserve(new_variants.size() - current_variants.size()); + std::vector current_to_new_discriminators; + current_to_new_discriminators.resize(current_variants.size()); + + for (ColumnVariant::Discriminator discr = 0; discr != new_variants.size(); ++discr) + { + const auto & name = new_variant_names.emplace_back(new_variants[discr]->getName()); + new_variant_name_to_discriminator[name] = discr; + + auto current_it = variant_info.variant_name_to_discriminator.find(name); + if (current_it == variant_info.variant_name_to_discriminator.end()) + new_variant_columns_and_discriminators_to_add.emplace_back(new_variants[discr]->createColumn(), discr); + else + current_to_new_discriminators[current_it->second] = discr; + } + + variant_info.variant_type = new_variant_type; + variant_info.variant_name = new_variant_type->getName(); + variant_info.variant_names = new_variant_names; + variant_info.variant_name_to_discriminator = new_variant_name_to_discriminator; + assert_cast(*variant_column).extend(current_to_new_discriminators, std::move(new_variant_columns_and_discriminators_to_add)); + /// Clear mappings cache because now with new Variant we will have new mappings. + variant_mappings_cache.clear(); +} + +std::vector * ColumnDynamic::combineVariants(const DB::ColumnDynamic::VariantInfo & other_variant_info) +{ + /// Check if we already have global discriminators mapping for other Variant in cache. + /// It's used to not calculate the same mapping each call of insertFrom with the same columns. + auto cache_it = variant_mappings_cache.find(other_variant_info.variant_name); + if (cache_it != variant_mappings_cache.end()) + return &cache_it->second; + + /// Check if we already tried to combine these variants but failed due to max_dynamic_types limit. + if (variants_with_failed_combination.contains(other_variant_info.variant_name)) + return nullptr; + + const DataTypes & other_variants = assert_cast(*other_variant_info.variant_type).getVariants(); + + size_t num_new_variants = 0; + for (size_t i = 0; i != other_variants.size(); ++i) + { + if (!variant_info.variant_name_to_discriminator.contains(other_variant_info.variant_names[i])) + ++num_new_variants; + } + + /// If we have new variants we need to update current variant info and extend Variant column + if (num_new_variants) + { + const DataTypes & current_variants = assert_cast(*variant_info.variant_type).getVariants(); + + /// We cannot combine Variants if total number of variants exceeds max_dynamic_types. + if (current_variants.size() + num_new_variants > max_dynamic_types) + { + /// Remember that we cannot combine our variant with this one, so we will not try to do it again. + variants_with_failed_combination.insert(other_variant_info.variant_name); + return nullptr; + } + + /// We cannot combine Variants if total number of variants reaches max_dynamic_types and we don't have String variant. + if (current_variants.size() + num_new_variants == max_dynamic_types && !variant_info.variant_name_to_discriminator.contains("String") && !other_variant_info.variant_name_to_discriminator.contains("String")) + { + variants_with_failed_combination.insert(other_variant_info.variant_name); + return nullptr; + } + + DataTypes all_variants = current_variants; + all_variants.insert(all_variants.end(), other_variants.begin(), other_variants.end()); + auto new_variant_type = std::make_shared(all_variants); + updateVariantInfoAndExpandVariantColumn(new_variant_type); + } + + /// Create a global discriminators mapping for other variant. + std::vector other_to_new_discriminators; + other_to_new_discriminators.reserve(other_variants.size()); + for (size_t i = 0; i != other_variants.size(); ++i) + other_to_new_discriminators.push_back(variant_info.variant_name_to_discriminator[other_variant_info.variant_names[i]]); + + /// Save mapping to cache to not calculate it again for the same Variants. + auto [it, _] = variant_mappings_cache.emplace(other_variant_info.variant_name, std::move(other_to_new_discriminators)); + return &it->second; +} + +void ColumnDynamic::insert(const DB::Field & x) +{ + /// Check if we can insert field without Variant extension. + if (variant_column->tryInsert(x)) + return; + + /// If we cannot insert field into current variant column, extend it with new variant for this field from its type. + if (addNewVariant(applyVisitor(FieldToDataType(), x))) + { + /// Now we should be able to insert this field into extended variant column. + variant_column->insert(x); + } + else + { + /// We reached maximum number of variants and couldn't add new variant. + /// This case should be really rare in real use cases. + /// We should always be able to add String variant and cast inserted value to String. + addStringVariant(); + variant_column->insert(toString(x)); + } +} + +bool ColumnDynamic::tryInsert(const DB::Field & x) +{ + /// We can insert any value into Dynamic column. + insert(x); + return true; +} + + +void ColumnDynamic::insertFrom(const DB::IColumn & src_, size_t n) +{ + const auto & dynamic_src = assert_cast(src_); + + /// Check if we have the same variants in both columns. + if (variant_info.variant_name == dynamic_src.variant_info.variant_name) + { + variant_column->insertFrom(*dynamic_src.variant_column, n); + return; + } + + auto & variant_col = assert_cast(*variant_column); + + /// If variants are different, we need to extend our variant with new variants. + if (auto * global_discriminators_mapping = combineVariants(dynamic_src.variant_info)) + { + variant_col.insertFrom(*dynamic_src.variant_column, n, *global_discriminators_mapping); + return; + } + + /// We cannot combine 2 Variant types as total number of variants exceeds the limit. + /// We need to insert single value, try to add only corresponding variant. + const auto & src_variant_col = assert_cast(*dynamic_src.variant_column); + auto src_global_discr = src_variant_col.globalDiscriminatorAt(n); + + /// NULL doesn't require Variant extension. + if (src_global_discr == ColumnVariant::NULL_DISCRIMINATOR) + { + insertDefault(); + return; + } + + auto variant_type = assert_cast(*dynamic_src.variant_info.variant_type).getVariants()[src_global_discr]; + if (addNewVariant(variant_type)) + { + auto discr = variant_info.variant_name_to_discriminator[dynamic_src.variant_info.variant_names[src_global_discr]]; + variant_col.insertIntoVariantFrom(discr, src_variant_col.getVariantByGlobalDiscriminator(src_global_discr), src_variant_col.offsetAt(n)); + return; + } + + /// We reached maximum number of variants and couldn't add new variant. + /// We should always be able to add String variant and cast inserted value to String. + addStringVariant(); + auto tmp_variant_column = src_variant_col.getVariantByGlobalDiscriminator(src_global_discr).cloneEmpty(); + tmp_variant_column->insertFrom(src_variant_col.getVariantByGlobalDiscriminator(src_global_discr), src_variant_col.offsetAt(n)); + auto tmp_string_column = castColumn(ColumnWithTypeAndName(tmp_variant_column->getPtr(), variant_type, ""), std::make_shared()); + auto string_variant_discr = variant_info.variant_name_to_discriminator["String"]; + variant_col.insertIntoVariantFrom(string_variant_discr, *tmp_string_column, 0); +} + +void ColumnDynamic::insertRangeFrom(const DB::IColumn & src_, size_t start, size_t length) +{ + if (start + length > src_.size()) + throw Exception(ErrorCodes::PARAMETER_OUT_OF_BOUND, "Parameter out of bound in ColumnDynamic::insertRangeFrom method. " + "[start({}) + length({}) > src.size()({})]", start, length, src_.size()); + + const auto & dynamic_src = assert_cast(src_); + + /// Check if we have the same variants in both columns. + if (variant_info.variant_names == dynamic_src.variant_info.variant_names) + { + variant_column->insertRangeFrom(*dynamic_src.variant_column, start, length); + return; + } + + auto & variant_col = assert_cast(*variant_column); + + /// If variants are different, we need to extend our variant with new variants. + if (auto * global_discriminators_mapping = combineVariants(dynamic_src.variant_info)) + { + variant_col.insertRangeFrom(*dynamic_src.variant_column, start, length, *global_discriminators_mapping); + return; + } + + /// We cannot combine 2 Variant types as total number of variants exceeds the limit. + /// In this case we will add most frequent variants from this range and insert them as usual, + /// all other variants will be converted to String. + /// TODO: instead of keeping all current variants and just adding new most frequent variants + /// from source columns we can also try to replace rarest existing variants with frequent + /// variants from source column (so we will avoid casting new frequent variants to String + /// and keeping rare existing ones). It will require rewriting of existing data in Variant + /// column but will improve usability of Dynamic column for example during squashing blocks + /// during insert. + + const auto & src_variant_column = dynamic_src.getVariantColumn(); + + /// Calculate ranges for each variant in current range. + std::vector> variants_ranges(dynamic_src.variant_info.variant_names.size(), {0, 0}); + /// If we insert the whole column, no need to iterate through the range, we can just take variant sizes. + if (start == 0 && length == dynamic_src.size()) + { + for (size_t i = 0; i != dynamic_src.variant_info.variant_names.size(); ++i) + variants_ranges[i] = {0, src_variant_column.getVariantByGlobalDiscriminator(i).size()}; + } + /// Otherwise we need to iterate through discriminators and calculate the range for each variant. + else + { + const auto & local_discriminators = src_variant_column.getLocalDiscriminators(); + const auto & offsets = src_variant_column.getOffsets(); + size_t end = start + length; + for (size_t i = start; i != end; ++i) + { + auto discr = src_variant_column.globalDiscriminatorByLocal(local_discriminators[i]); + if (discr != ColumnVariant::NULL_DISCRIMINATOR) + { + if (!variants_ranges[discr].second) + variants_ranges[discr].first = offsets[i]; + ++variants_ranges[discr].second; + } + } + } + + const auto & src_variants = assert_cast(*dynamic_src.variant_info.variant_type).getVariants(); + /// List of variants that will be converted to String. + std::vector variants_to_convert_to_string; + /// Mapping from global discriminators of src_variant to the new variant we will create. + std::vector other_to_new_discriminators; + other_to_new_discriminators.reserve(dynamic_src.variant_info.variant_names.size()); + + /// Check if we cannot add any more new variants. In this case we will convert all new variants to String. + if (variant_info.variant_names.size() == max_dynamic_types || (variant_info.variant_names.size() == max_dynamic_types - 1 && !variant_info.variant_name_to_discriminator.contains("String"))) + { + addStringVariant(); + for (size_t i = 0; i != dynamic_src.variant_info.variant_names.size(); ++i) + { + auto it = variant_info.variant_name_to_discriminator.find(dynamic_src.variant_info.variant_names[i]); + if (it == variant_info.variant_name_to_discriminator.end()) + { + variants_to_convert_to_string.push_back(i); + other_to_new_discriminators.push_back(variant_info.variant_name_to_discriminator["String"]); + } + else + { + other_to_new_discriminators.push_back(it->second); + } + } + } + /// We still can add some new variants, but not all of them. Let's choose the most frequent variants in specified range. + else + { + std::vector> new_variants_with_sizes; + new_variants_with_sizes.reserve(dynamic_src.variant_info.variant_names.size()); + for (size_t i = 0; i != dynamic_src.variant_info.variant_names.size(); ++i) + { + const auto & variant_name = dynamic_src.variant_info.variant_names[i]; + if (variant_name != "String" && !variant_info.variant_name_to_discriminator.contains(variant_name)) + new_variants_with_sizes.emplace_back(variants_ranges[i].second, i); + } + + std::sort(new_variants_with_sizes.begin(), new_variants_with_sizes.end(), std::greater()); + DataTypes new_variants = assert_cast(*variant_info.variant_type).getVariants(); + if (!variant_info.variant_name_to_discriminator.contains("String")) + new_variants.push_back(std::make_shared()); + + for (const auto & [_, discr] : new_variants_with_sizes) + { + if (new_variants.size() != max_dynamic_types) + new_variants.push_back(src_variants[discr]); + else + variants_to_convert_to_string.push_back(discr); + } + + auto new_variant_type = std::make_shared(new_variants); + updateVariantInfoAndExpandVariantColumn(new_variant_type); + auto string_variant_discriminator = variant_info.variant_name_to_discriminator.at("String"); + for (const auto & variant_name : dynamic_src.variant_info.variant_names) + { + auto it = variant_info.variant_name_to_discriminator.find(variant_name); + if (it == variant_info.variant_name_to_discriminator.end()) + other_to_new_discriminators.push_back(string_variant_discriminator); + else + other_to_new_discriminators.push_back(it->second); + } + } + + /// Convert to String all variants that couldn't be added. + std::unordered_map variants_converted_to_string; + variants_converted_to_string.reserve(variants_to_convert_to_string.size()); + for (auto discr : variants_to_convert_to_string) + { + auto [variant_start, variant_length] = variants_ranges[discr]; + const auto & variant = src_variant_column.getVariantPtrByGlobalDiscriminator(discr); + if (variant_start == 0 && variant_length == variant->size()) + variants_converted_to_string[discr] = castColumn(ColumnWithTypeAndName(variant, src_variants[discr], ""), std::make_shared()); + else + variants_converted_to_string[discr] = castColumn(ColumnWithTypeAndName(variant->cut(variant_start, variant_length), src_variants[discr], ""), std::make_shared()); + } + + const auto & src_local_discriminators = src_variant_column.getLocalDiscriminators(); + const auto & src_offsets = src_variant_column.getOffsets(); + const auto & src_variant_columns = src_variant_column.getVariants(); + size_t end = start + length; + for (size_t i = start; i != end; ++i) + { + auto local_discr = src_local_discriminators[i]; + if (local_discr == ColumnVariant::NULL_DISCRIMINATOR) + { + variant_col.insertDefault(); + } + else + { + auto global_discr = src_variant_column.globalDiscriminatorByLocal(local_discr); + auto to_global_discr = other_to_new_discriminators[global_discr]; + auto it = variants_converted_to_string.find(global_discr); + if (it == variants_converted_to_string.end()) + { + variant_col.insertIntoVariantFrom(to_global_discr, *src_variant_columns[local_discr], src_offsets[i]); + } + else + { + variant_col.insertIntoVariantFrom(to_global_discr, *it->second, src_offsets[i] - variants_ranges[global_discr].first); + } + } + } +} + +void ColumnDynamic::insertManyFrom(const DB::IColumn & src_, size_t position, size_t length) +{ + const auto & dynamic_src = assert_cast(src_); + + /// Check if we have the same variants in both columns. + if (variant_info.variant_names == dynamic_src.variant_info.variant_names) + { + variant_column->insertManyFrom(*dynamic_src.variant_column, position, length); + return; + } + + auto & variant_col = assert_cast(*variant_column); + + /// If variants are different, we need to extend our variant with new variants. + if (auto * global_discriminators_mapping = combineVariants(dynamic_src.variant_info)) + { + variant_col.insertManyFrom(*dynamic_src.variant_column, position, length, *global_discriminators_mapping); + return; + } + + /// We cannot combine 2 Variant types as total number of variants exceeds the limit. + /// We need to insert single value, try to add only corresponding variant. + const auto & src_variant_col = assert_cast(*dynamic_src.variant_column); + auto src_global_discr = src_variant_col.globalDiscriminatorAt(position); + if (src_global_discr == ColumnVariant::NULL_DISCRIMINATOR) + { + insertDefault(); + return; + } + + auto variant_type = assert_cast(*dynamic_src.variant_info.variant_type).getVariants()[src_global_discr]; + if (addNewVariant(variant_type)) + { + auto discr = variant_info.variant_name_to_discriminator[dynamic_src.variant_info.variant_names[src_global_discr]]; + variant_col.insertManyIntoVariantFrom(discr, src_variant_col.getVariantByGlobalDiscriminator(src_global_discr), src_variant_col.offsetAt(position), length); + return; + } + + addStringVariant(); + auto tmp_variant_column = src_variant_col.getVariantByGlobalDiscriminator(src_global_discr).cloneEmpty(); + tmp_variant_column->insertFrom(src_variant_col.getVariantByGlobalDiscriminator(src_global_discr), src_variant_col.offsetAt(position)); + auto tmp_string_column = castColumn(ColumnWithTypeAndName(tmp_variant_column->getPtr(), variant_type, ""), std::make_shared()); + auto string_variant_discr = variant_info.variant_name_to_discriminator["String"]; + variant_col.insertManyIntoVariantFrom(string_variant_discr, *tmp_string_column, 0, length); +} + + +StringRef ColumnDynamic::serializeValueIntoArena(size_t n, DB::Arena & arena, const char *& begin) const +{ + /// We cannot use Variant serialization here as it serializes discriminator + value, + /// but Dynamic doesn't have fixed mapping discriminator <-> variant type + /// as different Dynamic column can have different Variants. + /// Instead, we serialize null bit + variant type name (size + bytes) + value. + const auto & variant_col = assert_cast(*variant_column); + auto discr = variant_col.globalDiscriminatorAt(n); + StringRef res; + UInt8 null_bit = discr == ColumnVariant::NULL_DISCRIMINATOR; + if (null_bit) + { + char * pos = arena.allocContinue(sizeof(UInt8), begin); + memcpy(pos, &null_bit, sizeof(UInt8)); + res.data = pos; + res.size = sizeof(UInt8); + return res; + } + + const auto & variant_name = variant_info.variant_names[discr]; + size_t variant_name_size = variant_name.size(); + char * pos = arena.allocContinue(sizeof(UInt8) + sizeof(size_t) + variant_name.size(), begin); + memcpy(pos, &null_bit, sizeof(UInt8)); + memcpy(pos + sizeof(UInt8), &variant_name_size, sizeof(size_t)); + memcpy(pos + sizeof(UInt8) + sizeof(size_t), variant_name.data(), variant_name.size()); + res.data = pos; + res.size = sizeof(UInt8) + sizeof(size_t) + variant_name.size(); + + auto value_ref = variant_col.getVariantByGlobalDiscriminator(discr).serializeValueIntoArena(variant_col.offsetAt(n), arena, begin); + res.data = value_ref.data - res.size; + res.size += value_ref.size; + return res; +} + +const char * ColumnDynamic::deserializeAndInsertFromArena(const char * pos) +{ + auto & variant_col = assert_cast(*variant_column); + UInt8 null_bit = unalignedLoad(pos); + pos += sizeof(UInt8); + if (null_bit) + { + insertDefault(); + return pos; + } + + /// Read variant type name. + const size_t variant_name_size = unalignedLoad(pos); + pos += sizeof(variant_name_size); + String variant_name; + variant_name.resize(variant_name_size); + memcpy(variant_name.data(), pos, variant_name_size); + pos += variant_name_size; + /// If we already have such variant, just deserialize it into corresponding variant column. + auto it = variant_info.variant_name_to_discriminator.find(variant_name); + if (it != variant_info.variant_name_to_discriminator.end()) + { + auto discr = it->second; + return variant_col.deserializeVariantAndInsertFromArena(discr, pos); + } + + /// If we don't have such variant, add it. + auto variant_type = DataTypeFactory::instance().get(variant_name); + if (likely(addNewVariant(variant_type))) + { + auto discr = variant_info.variant_name_to_discriminator[variant_name]; + return variant_col.deserializeVariantAndInsertFromArena(discr, pos); + } + + /// We reached maximum number of variants and couldn't add new variant. + /// We should always be able to add String variant and cast inserted value to String. + addStringVariant(); + /// Create temporary column of this variant type and deserialize value into it. + auto tmp_variant_column = variant_type->createColumn(); + pos = tmp_variant_column->deserializeAndInsertFromArena(pos); + /// Cast temporary column to String and insert this value into String variant. + auto str_column = castColumn(ColumnWithTypeAndName(tmp_variant_column->getPtr(), variant_type, ""), std::make_shared()); + variant_col.insertIntoVariantFrom(variant_info.variant_name_to_discriminator["String"], *str_column, 0); + return pos; +} + +const char * ColumnDynamic::skipSerializedInArena(const char * pos) const +{ + UInt8 null_bit = unalignedLoad(pos); + pos += sizeof(UInt8); + if (null_bit) + return pos; + + const size_t variant_name_size = unalignedLoad(pos); + pos += sizeof(variant_name_size); + String variant_name; + variant_name.resize(variant_name_size); + memcpy(variant_name.data(), pos, variant_name_size); + pos += variant_name_size; + auto tmp_variant_column = DataTypeFactory::instance().get(variant_name)->createColumn(); + return tmp_variant_column->skipSerializedInArena(pos); +} + +void ColumnDynamic::updateHashWithValue(size_t n, SipHash & hash) const +{ + const auto & variant_col = assert_cast(*variant_column); + auto discr = variant_col.globalDiscriminatorAt(n); + if (discr == ColumnVariant::NULL_DISCRIMINATOR) + { + hash.update(discr); + return; + } + + hash.update(variant_info.variant_names[discr]); + variant_col.getVariantByGlobalDiscriminator(discr).updateHashWithValue(variant_col.offsetAt(n), hash); +} + +int ColumnDynamic::compareAt(size_t n, size_t m, const DB::IColumn & rhs, int nan_direction_hint) const +{ + const auto & left_variant = assert_cast(*variant_column); + const auto & right_dynamic = assert_cast(rhs); + const auto & right_variant = assert_cast(*right_dynamic.variant_column); + + auto left_discr = left_variant.globalDiscriminatorAt(n); + auto right_discr = right_variant.globalDiscriminatorAt(m); + + /// Check if we have NULLs and return result based on nan_direction_hint. + if (left_discr == ColumnVariant::NULL_DISCRIMINATOR && right_discr == ColumnVariant::NULL_DISCRIMINATOR) + return 0; + else if (left_discr == ColumnVariant::NULL_DISCRIMINATOR) + return nan_direction_hint; + else if (right_discr == ColumnVariant::NULL_DISCRIMINATOR) + return -nan_direction_hint; + + /// If rows have different types, we compare type names. + if (variant_info.variant_names[left_discr] != right_dynamic.variant_info.variant_names[right_discr]) + return variant_info.variant_names[left_discr] < right_dynamic.variant_info.variant_names[right_discr] ? -1 : 1; + + /// If rows have the same types, compare actual values from corresponding variants. + return left_variant.getVariantByGlobalDiscriminator(left_discr).compareAt(left_variant.offsetAt(n), right_variant.offsetAt(m), right_variant.getVariantByGlobalDiscriminator(right_discr), nan_direction_hint); +} + +ColumnPtr ColumnDynamic::compress() const +{ + ColumnPtr variant_compressed = variant_column->compress(); + size_t byte_size = variant_compressed->byteSize(); + return ColumnCompressed::create(size(), byte_size, + [my_variant_compressed = std::move(variant_compressed), my_variant_info = variant_info, my_max_dynamic_types = max_dynamic_types, my_statistics = statistics]() mutable + { + return ColumnDynamic::create(my_variant_compressed->decompress(), my_variant_info, my_max_dynamic_types, my_statistics); + }); +} + +void ColumnDynamic::takeDynamicStructureFromSourceColumns(const Columns & source_columns) +{ + if (!empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "takeDynamicStructureFromSourceColumns should be called only on empty Dynamic column"); + + /// During serialization of Dynamic column in MergeTree all Dynamic columns + /// in single part must have the same structure (the same variants). During merge + /// resulting column is constructed by inserting from source columns, + /// but it may happen that resulting column doesn't have rows from all source parts + /// but only from subset of them, and as a result some variants could be missing + /// and structures of resulting column may differ. + /// To solve this problem, before merge we create empty resulting column and use this method + /// to take dynamic structure from all source column even if we won't insert + /// rows from some of them. + + /// We want to construct resulting variant with most frequent variants from source columns and convert the rarest + /// variants to single String variant if we exceed the limit of variants. + /// First, collect all variants from all source columns and calculate total sizes. + std::unordered_map total_sizes; + DataTypes all_variants; + + for (const auto & source_column : source_columns) + { + const auto & source_dynamic = assert_cast(*source_column); + const auto & source_variant_column = source_dynamic.getVariantColumn(); + const auto & source_variant_info = source_dynamic.getVariantInfo(); + const auto & source_variants = assert_cast(*source_variant_info.variant_type).getVariants(); + /// During deserialization from MergeTree we will have variant sizes statistics from the whole data part. + const auto & source_statistics = source_dynamic.getStatistics(); + for (size_t i = 0; i != source_variants.size(); ++i) + { + const auto & variant_name = source_variant_info.variant_names[i]; + auto it = total_sizes.find(variant_name); + /// Add this variant to the list of all variants if we didn't see it yet. + if (it == total_sizes.end()) + { + all_variants.push_back(source_variants[i]); + it = total_sizes.emplace(variant_name, 0).first; + } + auto statistics_it = source_statistics.data.find(variant_name); + size_t size = statistics_it == source_statistics.data.end() ? source_variant_column.getVariantByGlobalDiscriminator(i).size() : statistics_it->second; + it->second += size; + } + } + + DataTypePtr result_variant_type; + /// Check if the number of all variants exceeds the limit. + if (all_variants.size() > max_dynamic_types || (all_variants.size() == max_dynamic_types && !total_sizes.contains("String"))) + { + /// Create list of variants with their sizes and sort it. + std::vector> variants_with_sizes; + variants_with_sizes.reserve(all_variants.size()); + for (const auto & variant : all_variants) + variants_with_sizes.emplace_back(total_sizes[variant->getName()], variant); + std::sort(variants_with_sizes.begin(), variants_with_sizes.end(), std::greater()); + + /// Take first max_dynamic_types variants from sorted list. + DataTypes result_variants; + result_variants.reserve(max_dynamic_types); + /// Add String variant in advance. + result_variants.push_back(std::make_shared()); + for (const auto & [_, variant] : variants_with_sizes) + { + if (result_variants.size() == max_dynamic_types) + break; + + if (variant->getName() != "String") + result_variants.push_back(variant); + } + + result_variant_type = std::make_shared(result_variants); + } + else + { + result_variant_type = std::make_shared(all_variants); + } + + /// Now we have resulting Variant and can fill variant info. + variant_info.variant_type = result_variant_type; + variant_info.variant_name = result_variant_type->getName(); + const auto & result_variants = assert_cast(*result_variant_type).getVariants(); + variant_info.variant_names.clear(); + variant_info.variant_names.reserve(result_variants.size()); + variant_info.variant_name_to_discriminator.clear(); + variant_info.variant_name_to_discriminator.reserve(result_variants.size()); + statistics.data.clear(); + statistics.data.reserve(result_variants.size()); + statistics.source = Statistics::Source::MERGE; + for (size_t i = 0; i != result_variants.size(); ++i) + { + auto variant_name = result_variants[i]->getName(); + variant_info.variant_names.push_back(variant_name); + variant_info.variant_name_to_discriminator[variant_name] = i; + statistics.data[variant_name] = total_sizes[variant_name]; + } + + variant_column = variant_info.variant_type->createColumn(); + + /// Now we have the resulting Variant that will be used in all merged columns. + /// Variants can also contain Dynamic columns inside, we should collect + /// all source variants that will be used in the resulting merged column + /// and call takeDynamicStructureFromSourceColumns on all resulting variants. + std::vector variants_source_columns; + variants_source_columns.resize(variant_info.variant_names.size()); + for (const auto & source_column : source_columns) + { + const auto & source_dynamic_column = assert_cast(*source_column); + const auto & source_variant_info = source_dynamic_column.getVariantInfo(); + for (size_t i = 0; i != variant_info.variant_names.size(); ++i) + { + /// Try to find this variant in current source column. + auto it = source_variant_info.variant_name_to_discriminator.find(variant_info.variant_names[i]); + if (it != source_variant_info.variant_name_to_discriminator.end()) + variants_source_columns[i].push_back(source_dynamic_column.getVariantColumn().getVariantPtrByGlobalDiscriminator(it->second)); + } + } + + auto & variant_col = getVariantColumn(); + for (size_t i = 0; i != variant_info.variant_names.size(); ++i) + variant_col.getVariantByGlobalDiscriminator(i).takeDynamicStructureFromSourceColumns(variants_source_columns[i]); +} + +void ColumnDynamic::applyNullMap(const ColumnVector::Container & null_map) +{ + assert_cast(*variant_column).applyNullMap(null_map); +} + +void ColumnDynamic::applyNegatedNullMap(const ColumnVector::Container & null_map) +{ + assert_cast(*variant_column).applyNegatedNullMap(null_map); +} + +} diff --git a/src/Columns/ColumnDynamic.h b/src/Columns/ColumnDynamic.h new file mode 100644 index 00000000000..27ad0dd583f --- /dev/null +++ b/src/Columns/ColumnDynamic.h @@ -0,0 +1,365 @@ +#pragma once + +#include +#include +#include +#include + + +namespace DB +{ + +/** + * Column for storing Dynamic type values. + * Dynamic column allows to insert and store values of any data types inside. + * Inside it stores: + * - Variant column with all inserted values of different types. + * - Information about currently stored variants. + * + * When new values are inserted into Dynamic column, the internal Variant + * type and column are extended if the inserted value has new type. + */ +class ColumnDynamic final : public COWHelper, ColumnDynamic> +{ +public: + /// + struct Statistics + { + enum class Source + { + READ, /// Statistics were loaded into column during reading from MergeTree. + MERGE, /// Statistics were calculated during merge of several MergeTree parts. + }; + + /// Source of the statistics. + Source source; + /// Statistics data: (variant name) -> (total variant size in data part). + std::unordered_map data; + }; + +private: + friend class COWHelper, ColumnDynamic>; + + struct VariantInfo + { + DataTypePtr variant_type; + /// Name of the whole variant to not call getName() every time. + String variant_name; + /// Names of variants to not call getName() every time on variants. + Names variant_names; + /// Mapping (variant name) -> (global discriminator). + /// It's used during variant extension. + std::unordered_map variant_name_to_discriminator; + }; + + explicit ColumnDynamic(size_t max_dynamic_types_); + ColumnDynamic(MutableColumnPtr variant_column_, const VariantInfo & variant_info_, size_t max_dynamic_types_, const Statistics & statistics_ = {}); + +public: + /** Create immutable column using immutable arguments. This arguments may be shared with other columns. + * Use IColumn::mutate in order to make mutable column and mutate shared nested columns. + */ + using Base = COWHelper, ColumnDynamic>; + static Ptr create(const ColumnPtr & variant_column_, const VariantInfo & variant_info_, size_t max_dynamic_types_, const Statistics & statistics_ = {}) + { + return ColumnDynamic::create(variant_column_->assumeMutable(), variant_info_, max_dynamic_types_, statistics_); + } + + static MutablePtr create(MutableColumnPtr variant_column_, const VariantInfo & variant_info_, size_t max_dynamic_types_, const Statistics & statistics_ = {}) + { + return Base::create(std::move(variant_column_), variant_info_, max_dynamic_types_, statistics_); + } + + static MutablePtr create(MutableColumnPtr variant_column_, const DataTypePtr & variant_type, size_t max_dynamic_types_, const Statistics & statistics_ = {}); + + static ColumnPtr create(ColumnPtr variant_column_, const DataTypePtr & variant_type, size_t max_dynamic_types_, const Statistics & statistics_ = {}) + { + return create(variant_column_->assumeMutable(), variant_type, max_dynamic_types_, statistics_); + } + + static MutablePtr create(size_t max_dynamic_types_) + { + return Base::create(max_dynamic_types_); + } + + std::string getName() const override { return "Dynamic(max_types=" + std::to_string(max_dynamic_types) + ")"; } + + const char * getFamilyName() const override + { + return "Dynamic"; + } + + TypeIndex getDataType() const override + { + return TypeIndex::Dynamic; + } + + MutableColumnPtr cloneEmpty() const override + { + /// Keep current dynamic structure + return Base::create(variant_column->cloneEmpty(), variant_info, max_dynamic_types, statistics); + } + + MutableColumnPtr cloneResized(size_t size) const override + { + return Base::create(variant_column->cloneResized(size), variant_info, max_dynamic_types, statistics); + } + + size_t size() const override + { + return variant_column->size(); + } + + Field operator[](size_t n) const override + { + return (*variant_column)[n]; + } + + void get(size_t n, Field & res) const override + { + variant_column->get(n, res); + } + + bool isDefaultAt(size_t n) const override + { + return variant_column->isDefaultAt(n); + } + + bool isNullAt(size_t n) const override + { + return variant_column->isNullAt(n); + } + + StringRef getDataAt(size_t n) const override + { + return variant_column->getDataAt(n); + } + + void insertData(const char * pos, size_t length) override + { + variant_column->insertData(pos, length); + } + + void insert(const Field & x) override; + bool tryInsert(const Field & x) override; + void insertFrom(const IColumn & src_, size_t n) override; + void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; + void insertManyFrom(const IColumn & src, size_t position, size_t length) override; + + void insertDefault() override + { + variant_column->insertDefault(); + } + + void insertManyDefaults(size_t length) override + { + variant_column->insertManyDefaults(length); + } + + void popBack(size_t n) override + { + variant_column->popBack(n); + } + + StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override; + const char * deserializeAndInsertFromArena(const char * pos) override; + const char * skipSerializedInArena(const char * pos) const override; + + void updateHashWithValue(size_t n, SipHash & hash) const override; + + void updateWeakHash32(WeakHash32 & hash) const override + { + variant_column->updateWeakHash32(hash); + } + + void updateHashFast(SipHash & hash) const override + { + variant_column->updateHashFast(hash); + } + + ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override + { + return create(variant_column->filter(filt, result_size_hint), variant_info, max_dynamic_types); + } + + void expand(const Filter & mask, bool inverted) override + { + variant_column->expand(mask, inverted); + } + + ColumnPtr permute(const Permutation & perm, size_t limit) const override + { + return create(variant_column->permute(perm, limit), variant_info, max_dynamic_types); + } + + ColumnPtr index(const IColumn & indexes, size_t limit) const override + { + return create(variant_column->index(indexes, limit), variant_info, max_dynamic_types); + } + + ColumnPtr replicate(const Offsets & replicate_offsets) const override + { + return create(variant_column->replicate(replicate_offsets), variant_info, max_dynamic_types); + } + + MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override + { + MutableColumns scattered_variant_columns = variant_column->scatter(num_columns, selector); + MutableColumns scattered_columns; + scattered_columns.reserve(num_columns); + for (auto & scattered_variant_column : scattered_variant_columns) + scattered_columns.emplace_back(create(std::move(scattered_variant_column), variant_info, max_dynamic_types)); + + return scattered_columns; + } + + int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; + + bool hasEqualValues() const override + { + return variant_column->hasEqualValues(); + } + + void getExtremes(Field & min, Field & max) const override + { + variant_column->getExtremes(min, max); + } + + void getPermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability, + size_t limit, int nan_direction_hint, IColumn::Permutation & res) const override + { + variant_column->getPermutation(direction, stability, limit, nan_direction_hint, res); + } + + void updatePermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability, + size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const override + { + variant_column->updatePermutation(direction, stability, limit, nan_direction_hint, res, equal_ranges); + } + + void reserve(size_t n) override + { + variant_column->reserve(n); + } + + void ensureOwnership() override + { + variant_column->ensureOwnership(); + } + + size_t byteSize() const override + { + return variant_column->byteSize(); + } + + size_t byteSizeAt(size_t n) const override + { + return variant_column->byteSizeAt(n); + } + + size_t allocatedBytes() const override + { + return variant_column->allocatedBytes(); + } + + void protect() override + { + variant_column->protect(); + } + + void forEachSubcolumn(MutableColumnCallback callback) override + { + callback(variant_column); + } + + void forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback) override + { + callback(*variant_column); + variant_column->forEachSubcolumnRecursively(callback); + } + + bool structureEquals(const IColumn & rhs) const override + { + if (const auto * rhs_concrete = typeid_cast(&rhs)) + return max_dynamic_types == rhs_concrete->max_dynamic_types; + return false; + } + + ColumnPtr compress() const override; + + double getRatioOfDefaultRows(double sample_ratio) const override + { + return variant_column->getRatioOfDefaultRows(sample_ratio); + } + + UInt64 getNumberOfDefaultRows() const override + { + return variant_column->getNumberOfDefaultRows(); + } + + void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override + { + variant_column->getIndicesOfNonDefaultRows(indices, from, limit); + } + + void finalize() override + { + variant_column->finalize(); + } + + bool isFinalized() const override + { + return variant_column->isFinalized(); + } + + /// Apply null map to a nested Variant column. + void applyNullMap(const ColumnVector::Container & null_map); + void applyNegatedNullMap(const ColumnVector::Container & null_map); + + const VariantInfo & getVariantInfo() const { return variant_info; } + + const ColumnPtr & getVariantColumnPtr() const { return variant_column; } + ColumnPtr & getVariantColumnPtr() { return variant_column; } + + const ColumnVariant & getVariantColumn() const { return assert_cast(*variant_column); } + ColumnVariant & getVariantColumn() { return assert_cast(*variant_column); } + + bool addNewVariant(const DataTypePtr & new_variant); + void addStringVariant(); + + bool hasDynamicStructure() const override { return true; } + void takeDynamicStructureFromSourceColumns(const Columns & source_columns) override; + + const Statistics & getStatistics() const { return statistics; } + + size_t getMaxDynamicTypes() const { return max_dynamic_types; } + +private: + /// Combine current variant with the other variant and return global discriminators mapping + /// from other variant to the combined one. It's used for inserting from + /// different variants. + /// Returns nullptr if maximum number of variants is reached and the new variant cannot be created. + std::vector * combineVariants(const VariantInfo & other_variant_info); + + void updateVariantInfoAndExpandVariantColumn(const DataTypePtr & new_variant_type); + + WrappedPtr variant_column; + /// Store the type of current variant with some additional information. + VariantInfo variant_info; + /// The maximum number of different types that can be stored in this Dynamic column. + /// If exceeded, all new variants will be converted to String. + size_t max_dynamic_types; + + /// Size statistics of each variants from MergeTree data part. + /// Used in takeDynamicStructureFromSourceColumns and set during deserialization. + Statistics statistics; + + /// Cache (Variant name) -> (global discriminators mapping from this variant to current variant in Dynamic column). + /// Used to avoid mappings recalculation in combineVariants for the same Variant types. + std::unordered_map> variant_mappings_cache; + /// Cache of Variant types that couldn't be combined with current variant in Dynamic column. + /// Used to avoid checking if combination is possible for the same Variant types. + std::unordered_set variants_with_failed_combination; +}; + +} diff --git a/src/Columns/ColumnMap.cpp b/src/Columns/ColumnMap.cpp index 57e8ba685b4..eecea1a273f 100644 --- a/src/Columns/ColumnMap.cpp +++ b/src/Columns/ColumnMap.cpp @@ -312,4 +312,13 @@ ColumnPtr ColumnMap::compress() const }); } +void ColumnMap::takeDynamicStructureFromSourceColumns(const Columns & source_columns) +{ + Columns nested_source_columns; + nested_source_columns.reserve(source_columns.size()); + for (const auto & source_column : source_columns) + nested_source_columns.push_back(assert_cast(*source_column).getNestedColumnPtr()); + nested->takeDynamicStructureFromSourceColumns(nested_source_columns); +} + } diff --git a/src/Columns/ColumnMap.h b/src/Columns/ColumnMap.h index 60aa69e7bf6..52165d0d74e 100644 --- a/src/Columns/ColumnMap.h +++ b/src/Columns/ColumnMap.h @@ -104,6 +104,9 @@ public: ColumnTuple & getNestedData() { return assert_cast(getNestedColumn().getData()); } ColumnPtr compress() const override; + + bool hasDynamicStructure() const override { return nested->hasDynamicStructure(); } + void takeDynamicStructureFromSourceColumns(const Columns & source_columns) override; }; } diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp index 30e62548ad6..dd9387d96b1 100644 --- a/src/Columns/ColumnNullable.cpp +++ b/src/Columns/ColumnNullable.cpp @@ -868,6 +868,15 @@ ColumnPtr ColumnNullable::getNestedColumnWithDefaultOnNull() const return res; } +void ColumnNullable::takeDynamicStructureFromSourceColumns(const Columns & source_columns) +{ + Columns nested_source_columns; + nested_source_columns.reserve(source_columns.size()); + for (const auto & source_column : source_columns) + nested_source_columns.push_back(assert_cast(*source_column).getNestedColumnPtr()); + nested_column->takeDynamicStructureFromSourceColumns(nested_source_columns); +} + ColumnPtr makeNullable(const ColumnPtr & column) { if (isColumnNullable(*column)) @@ -924,4 +933,23 @@ ColumnPtr makeNullableOrLowCardinalityNullableSafe(const ColumnPtr & column) return column; } +ColumnPtr removeNullable(const ColumnPtr & column) +{ + if (const auto * column_nullable = typeid_cast(column.get())) + return column_nullable->getNestedColumnPtr(); + return column; +} + +ColumnPtr removeNullableOrLowCardinalityNullable(const ColumnPtr & column) +{ + if (const auto * column_low_cardinality = typeid_cast(column.get())) + { + if (!column_low_cardinality->nestedIsNullable()) + return column; + return column_low_cardinality->cloneWithDefaultOnNull(); + } + + return removeNullable(column); +} + } diff --git a/src/Columns/ColumnNullable.h b/src/Columns/ColumnNullable.h index c7ebb6ed7b6..266c188db25 100644 --- a/src/Columns/ColumnNullable.h +++ b/src/Columns/ColumnNullable.h @@ -190,6 +190,9 @@ public: /// Check that size of null map equals to size of nested column. void checkConsistency() const; + bool hasDynamicStructure() const override { return nested_column->hasDynamicStructure(); } + void takeDynamicStructureFromSourceColumns(const Columns & source_columns) override; + private: WrappedPtr nested_column; WrappedPtr null_map; @@ -211,4 +214,7 @@ ColumnPtr makeNullableSafe(const ColumnPtr & column); ColumnPtr makeNullableOrLowCardinalityNullable(const ColumnPtr & column); ColumnPtr makeNullableOrLowCardinalityNullableSafe(const ColumnPtr & column); +ColumnPtr removeNullable(const ColumnPtr & column); +ColumnPtr removeNullableOrLowCardinalityNullable(const ColumnPtr & column); + } diff --git a/src/Columns/ColumnSparse.cpp b/src/Columns/ColumnSparse.cpp index 3a63d2bffc5..2e75a2fd4ab 100644 --- a/src/Columns/ColumnSparse.cpp +++ b/src/Columns/ColumnSparse.cpp @@ -8,7 +8,6 @@ #include #include #include -#include #include #include @@ -801,6 +800,15 @@ ColumnSparse::Iterator ColumnSparse::getIterator(size_t n) const return Iterator(offsets_data, _size, current_offset, n); } +void ColumnSparse::takeDynamicStructureFromSourceColumns(const Columns & source_columns) +{ + Columns values_source_columns; + values_source_columns.reserve(source_columns.size()); + for (const auto & source_column : source_columns) + values_source_columns.push_back(assert_cast(*source_column).getValuesPtr()); + values->takeDynamicStructureFromSourceColumns(values_source_columns); +} + ColumnPtr recursiveRemoveSparse(const ColumnPtr & column) { if (!column) diff --git a/src/Columns/ColumnSparse.h b/src/Columns/ColumnSparse.h index c1bd614102c..7d3200da35f 100644 --- a/src/Columns/ColumnSparse.h +++ b/src/Columns/ColumnSparse.h @@ -148,6 +148,9 @@ public: size_t sizeOfValueIfFixed() const override { return values->sizeOfValueIfFixed() + values->sizeOfValueIfFixed(); } bool isCollationSupported() const override { return values->isCollationSupported(); } + bool hasDynamicStructure() const override { return values->hasDynamicStructure(); } + void takeDynamicStructureFromSourceColumns(const Columns & source_columns) override; + size_t getNumberOfTrailingDefaults() const { return offsets->empty() ? _size : _size - getOffsetsData().back() - 1; diff --git a/src/Columns/ColumnTuple.cpp b/src/Columns/ColumnTuple.cpp index 2393fcf92fd..31734edced4 100644 --- a/src/Columns/ColumnTuple.cpp +++ b/src/Columns/ColumnTuple.cpp @@ -572,6 +572,34 @@ bool ColumnTuple::isCollationSupported() const return false; } +bool ColumnTuple::hasDynamicStructure() const +{ + for (const auto & column : columns) + { + if (column->hasDynamicStructure()) + return true; + } + return false; +} + +void ColumnTuple::takeDynamicStructureFromSourceColumns(const Columns & source_columns) +{ + std::vector nested_source_columns; + nested_source_columns.resize(columns.size()); + for (size_t i = 0; i != columns.size(); ++i) + nested_source_columns[i].reserve(source_columns.size()); + + for (const auto & source_column : source_columns) + { + const auto & nsource_columns = assert_cast(*source_column).getColumns(); + for (size_t i = 0; i != nsource_columns.size(); ++i) + nested_source_columns[i].push_back(nsource_columns[i]); + } + + for (size_t i = 0; i != columns.size(); ++i) + columns[i]->takeDynamicStructureFromSourceColumns(nested_source_columns[i]); +} + ColumnPtr ColumnTuple::compress() const { diff --git a/src/Columns/ColumnTuple.h b/src/Columns/ColumnTuple.h index 5b626155754..65103fa8c49 100644 --- a/src/Columns/ColumnTuple.h +++ b/src/Columns/ColumnTuple.h @@ -114,6 +114,9 @@ public: const ColumnPtr & getColumnPtr(size_t idx) const { return columns[idx]; } ColumnPtr & getColumnPtr(size_t idx) { return columns[idx]; } + bool hasDynamicStructure() const override; + void takeDynamicStructureFromSourceColumns(const Columns & source_columns) override; + private: int compareAtImpl(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator * collator=nullptr) const; diff --git a/src/Columns/ColumnVariant.cpp b/src/Columns/ColumnVariant.cpp index 31e9b0964f4..ec47f5dfa74 100644 --- a/src/Columns/ColumnVariant.cpp +++ b/src/Columns/ColumnVariant.cpp @@ -12,7 +12,6 @@ #include #include #include -#include #include @@ -452,16 +451,18 @@ bool ColumnVariant::tryInsert(const DB::Field & x) return false; } -void ColumnVariant::insertFrom(const IColumn & src_, size_t n) +void ColumnVariant::insertFromImpl(const DB::IColumn & src_, size_t n, const std::vector * global_discriminators_mapping) { + const size_t num_variants = variants.size(); const ColumnVariant & src = assert_cast(src_); - const size_t num_variants = variants.size(); - if (src.variants.size() != num_variants) + if (!global_discriminators_mapping && src.variants.size() != num_variants) throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert value of Variant type with different number of types"); - /// Remember that src column can have different local variants order. - Discriminator global_discr = src.globalDiscriminatorAt(n); + Discriminator src_global_discr = src.globalDiscriminatorAt(n); + Discriminator global_discr = src_global_discr; + if (global_discriminators_mapping && src_global_discr != NULL_DISCRIMINATOR) + global_discr = (*global_discriminators_mapping)[src_global_discr]; Discriminator local_discr = localDiscriminatorByGlobal(global_discr); getLocalDiscriminators().push_back(local_discr); if (local_discr == NULL_DISCRIMINATOR) @@ -471,25 +472,15 @@ void ColumnVariant::insertFrom(const IColumn & src_, size_t n) else { getOffsets().push_back(variants[local_discr]->size()); - variants[local_discr]->insertFrom(src.getVariantByGlobalDiscriminator(global_discr), src.offsetAt(n)); + variants[local_discr]->insertFrom(src.getVariantByGlobalDiscriminator(src_global_discr), src.offsetAt(n)); } } -void ColumnVariant::insertIntoVariant(const DB::Field & x, Discriminator global_discr) -{ - if (global_discr > variants.size()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid global discriminator: {}. The number of variants is {}", size_t(global_discr), variants.size()); - auto & variant = getVariantByGlobalDiscriminator(global_discr); - variant.insert(x); - getLocalDiscriminators().push_back(localDiscriminatorByGlobal(global_discr)); - getOffsets().push_back(variant.size() - 1); -} - -void ColumnVariant::insertRangeFrom(const IColumn & src_, size_t start, size_t length) +void ColumnVariant::insertRangeFromImpl(const DB::IColumn & src_, size_t start, size_t length, const std::vector * global_discriminators_mapping) { const size_t num_variants = variants.size(); const auto & src = assert_cast(src_); - if (src.variants.size() != num_variants) + if (!global_discriminators_mapping && src.variants.size() != num_variants) throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert value of Variant type with different number of types"); if (start + length > src.getLocalDiscriminators().size()) @@ -507,7 +498,12 @@ void ColumnVariant::insertRangeFrom(const IColumn & src_, size_t start, size_t l /// In this case we can simply call insertRangeFrom on this single variant. if (auto non_empty_src_local_discr = src.getLocalDiscriminatorOfOneNoneEmptyVariantNoNulls()) { - auto local_discr = localDiscriminatorByGlobal(src.globalDiscriminatorByLocal(*non_empty_src_local_discr)); + Discriminator src_global_discr = src.globalDiscriminatorByLocal(*non_empty_src_local_discr); + Discriminator global_discr = src_global_discr; + if (global_discriminators_mapping && src_global_discr != NULL_DISCRIMINATOR) + global_discr = (*global_discriminators_mapping)[src_global_discr]; + + Discriminator local_discr = localDiscriminatorByGlobal(global_discr); size_t offset = variants[local_discr]->size(); variants[local_discr]->insertRangeFrom(*src.variants[*non_empty_src_local_discr], start, length); getLocalDiscriminators().resize_fill(local_discriminators->size() + length, local_discr); @@ -522,7 +518,7 @@ void ColumnVariant::insertRangeFrom(const IColumn & src_, size_t start, size_t l /// collect ranges we need to insert for all variants and update offsets. /// nested_ranges[i].first - offset in src.variants[i] /// nested_ranges[i].second - length in src.variants[i] - std::vector> nested_ranges(num_variants, {0, 0}); + std::vector> nested_ranges(src.variants.size(), {0, 0}); auto & offsets_data = getOffsets(); offsets_data.reserve(offsets_data.size() + length); auto & local_discriminators_data = getLocalDiscriminators(); @@ -533,7 +529,11 @@ void ColumnVariant::insertRangeFrom(const IColumn & src_, size_t start, size_t l { /// We insert from src.variants[src_local_discr] to variants[local_discr] Discriminator src_local_discr = src_local_discriminators_data[i]; - Discriminator local_discr = localDiscriminatorByGlobal(src.globalDiscriminatorByLocal(src_local_discr)); + Discriminator src_global_discr = src.globalDiscriminatorByLocal(src_local_discr); + Discriminator global_discr = src_global_discr; + if (global_discriminators_mapping && src_global_discr != NULL_DISCRIMINATOR) + global_discr = (*global_discriminators_mapping)[src_global_discr]; + Discriminator local_discr = localDiscriminatorByGlobal(global_discr); local_discriminators_data.push_back(local_discr); if (local_discr == NULL_DISCRIMINATOR) { @@ -553,22 +553,29 @@ void ColumnVariant::insertRangeFrom(const IColumn & src_, size_t start, size_t l for (size_t src_local_discr = 0; src_local_discr != nested_ranges.size(); ++src_local_discr) { auto [nested_start, nested_length] = nested_ranges[src_local_discr]; - auto local_discr = localDiscriminatorByGlobal(src.globalDiscriminatorByLocal(src_local_discr)); + Discriminator src_global_discr = src.globalDiscriminatorByLocal(src_local_discr); + Discriminator global_discr = src_global_discr; + if (global_discriminators_mapping && src_global_discr != NULL_DISCRIMINATOR) + global_discr = (*global_discriminators_mapping)[src_global_discr]; + Discriminator local_discr = localDiscriminatorByGlobal(global_discr); if (nested_length) variants[local_discr]->insertRangeFrom(*src.variants[src_local_discr], nested_start, nested_length); } } -void ColumnVariant::insertManyFrom(const DB::IColumn & src_, size_t position, size_t length) +void ColumnVariant::insertManyFromImpl(const DB::IColumn & src_, size_t position, size_t length, const std::vector * global_discriminators_mapping) { const size_t num_variants = variants.size(); const auto & src = assert_cast(src_); - if (src.variants.size() != num_variants) + if (!global_discriminators_mapping && src.variants.size() != num_variants) throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert value of Variant type with different number of types"); - /// Remember that src column can have different local variants order. Discriminator src_local_discr = src.localDiscriminatorAt(position); - Discriminator local_discr = localDiscriminatorByGlobal(src.globalDiscriminatorByLocal(src_local_discr)); + Discriminator src_global_discr = src.globalDiscriminatorByLocal(src_local_discr); + Discriminator global_discr = src_global_discr; + if (global_discriminators_mapping && src_global_discr != NULL_DISCRIMINATOR) + global_discr = (*global_discriminators_mapping)[src_global_discr]; + Discriminator local_discr = localDiscriminatorByGlobal(global_discr); auto & local_discriminators_data = getLocalDiscriminators(); local_discriminators_data.resize_fill(local_discriminators_data.size() + length, local_discr); @@ -588,6 +595,72 @@ void ColumnVariant::insertManyFrom(const DB::IColumn & src_, size_t position, si } } +void ColumnVariant::insertFrom(const IColumn & src_, size_t n) +{ + insertFromImpl(src_, n, nullptr); +} + +void ColumnVariant::insertRangeFrom(const IColumn & src_, size_t start, size_t length) +{ + insertRangeFromImpl(src_, start, length, nullptr); +} + +void ColumnVariant::insertManyFrom(const DB::IColumn & src_, size_t position, size_t length) +{ + insertManyFromImpl(src_, position, length, nullptr); +} + +void ColumnVariant::insertFrom(const DB::IColumn & src_, size_t n, const std::vector & global_discriminators_mapping) +{ + insertFromImpl(src_, n, &global_discriminators_mapping); +} + +void ColumnVariant::insertRangeFrom(const IColumn & src_, size_t start, size_t length, const std::vector & global_discriminators_mapping) +{ + insertRangeFromImpl(src_, start, length, &global_discriminators_mapping); +} + +void ColumnVariant::insertManyFrom(const DB::IColumn & src_, size_t position, size_t length, const std::vector & global_discriminators_mapping) +{ + insertManyFromImpl(src_, position, length, &global_discriminators_mapping); +} + +void ColumnVariant::insertIntoVariantFrom(DB::ColumnVariant::Discriminator global_discr, const DB::IColumn & src_, size_t n) +{ + Discriminator local_discr = localDiscriminatorByGlobal(global_discr); + getLocalDiscriminators().push_back(local_discr); + getOffsets().push_back(variants[local_discr]->size()); + variants[local_discr]->insertFrom(src_, n); +} + +void ColumnVariant::insertRangeIntoVariantFrom(DB::ColumnVariant::Discriminator global_discr, const DB::IColumn & src_, size_t start, size_t length) +{ + Discriminator local_discr = localDiscriminatorByGlobal(global_discr); + auto & local_discriminators_data = getLocalDiscriminators(); + local_discriminators_data.resize_fill(local_discriminators_data.size() + length, local_discr); + auto & offsets_data = getOffsets(); + size_t offset = variants[local_discr]->size(); + offsets_data.reserve(offsets_data.size() + length); + for (size_t i = 0; i != length; ++i) + offsets_data.push_back(offset + i); + + variants[local_discr]->insertRangeFrom(src_, start, length); +} + +void ColumnVariant::insertManyIntoVariantFrom(DB::ColumnVariant::Discriminator global_discr, const DB::IColumn & src_, size_t position, size_t length) +{ + Discriminator local_discr = localDiscriminatorByGlobal(global_discr); + auto & local_discriminators_data = getLocalDiscriminators(); + local_discriminators_data.resize_fill(local_discriminators_data.size() + length, local_discr); + auto & offsets_data = getOffsets(); + size_t offset = variants[local_discr]->size(); + offsets_data.reserve(offsets_data.size() + length); + for (size_t i = 0; i != length; ++i) + offsets_data.push_back(offset + i); + + variants[local_discr]->insertManyFrom(src_, position, length); +} + void ColumnVariant::insertDefault() { getLocalDiscriminators().push_back(NULL_DISCRIMINATOR); @@ -678,6 +751,14 @@ const char * ColumnVariant::deserializeAndInsertFromArena(const char * pos) return variants[local_discr]->deserializeAndInsertFromArena(pos); } +const char * ColumnVariant::deserializeVariantAndInsertFromArena(DB::ColumnVariant::Discriminator global_discr, const char * pos) +{ + Discriminator local_discr = localDiscriminatorByGlobal(global_discr); + getLocalDiscriminators().push_back(local_discr); + getOffsets().push_back(variants[local_discr]->size()); + return variants[local_discr]->deserializeAndInsertFromArena(pos); +} + const char * ColumnVariant::skipSerializedInArena(const char * pos) const { Discriminator global_discr = unalignedLoad(pos); @@ -1426,4 +1507,54 @@ void ColumnVariant::applyNullMapImpl(const ColumnVector::Container & null } } +void ColumnVariant::extend(const std::vector & old_to_new_global_discriminators, std::vector> && new_variants_and_discriminators) +{ + /// Update global discriminators for current variants. + for (Discriminator & global_discr : local_to_global_discriminators) + global_discr = old_to_new_global_discriminators[global_discr]; + + /// Add new variants. + variants.reserve(variants.size() + new_variants_and_discriminators.size()); + local_to_global_discriminators.reserve(local_to_global_discriminators.size() + new_variants_and_discriminators.size()); + for (auto & new_variant_and_discriminator : new_variants_and_discriminators) + { + variants.emplace_back(std::move(new_variant_and_discriminator.first)); + local_to_global_discriminators.push_back(new_variant_and_discriminator.second); + } + + /// Update global -> local discriminators matching. + global_to_local_discriminators.resize(local_to_global_discriminators.size()); + for (Discriminator local_discr = 0; local_discr != local_to_global_discriminators.size(); ++local_discr) + global_to_local_discriminators[local_to_global_discriminators[local_discr]] = local_discr; +} + +bool ColumnVariant::hasDynamicStructure() const +{ + for (const auto & variant : variants) + { + if (variant->hasDynamicStructure()) + return true; + } + + return false; +} + +void ColumnVariant::takeDynamicStructureFromSourceColumns(const Columns & source_columns) +{ + std::vector variants_source_columns; + variants_source_columns.resize(variants.size()); + for (size_t i = 0; i != variants.size(); ++i) + variants_source_columns[i].reserve(source_columns.size()); + + for (const auto & source_column : source_columns) + { + const auto & source_variants = assert_cast(*source_column).variants; + for (size_t i = 0; i != source_variants.size(); ++i) + variants_source_columns[i].push_back(source_variants[i]); + } + + for (size_t i = 0; i != variants.size(); ++i) + variants[i]->takeDynamicStructureFromSourceColumns(variants_source_columns[i]); +} + } diff --git a/src/Columns/ColumnVariant.h b/src/Columns/ColumnVariant.h index 4aa2c9058cc..e5a4498f340 100644 --- a/src/Columns/ColumnVariant.h +++ b/src/Columns/ColumnVariant.h @@ -175,18 +175,32 @@ public: bool isDefaultAt(size_t n) const override; bool isNullAt(size_t n) const override; StringRef getDataAt(size_t n) const override; + void insertData(const char * pos, size_t length) override; void insert(const Field & x) override; bool tryInsert(const Field & x) override; - void insertIntoVariant(const Field & x, Discriminator global_discr); + void insertFrom(const IColumn & src_, size_t n) override; - void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; - void insertManyFrom(const IColumn & src, size_t position, size_t length) override; + void insertRangeFrom(const IColumn & src_, size_t start, size_t length) override; + void insertManyFrom(const IColumn & src_, size_t position, size_t length) override; + + /// Methods for insertion from another Variant but with known mapping between global discriminators. + void insertFrom(const IColumn & src_, size_t n, const std::vector & global_discriminators_mapping); + void insertRangeFrom(const IColumn & src_, size_t start, size_t length, const std::vector & global_discriminators_mapping); + void insertManyFrom(const IColumn & src_, size_t position, size_t length, const std::vector & global_discriminators_mapping); + + /// Methods for insertion into a specific variant. + void insertIntoVariantFrom(Discriminator global_discr, const IColumn & src_, size_t n); + void insertRangeIntoVariantFrom(Discriminator global_discr, const IColumn & src_, size_t start, size_t length); + void insertManyIntoVariantFrom(Discriminator global_discr, const IColumn & src_, size_t position, size_t length); + void insertDefault() override; void insertManyDefaults(size_t length) override; + void popBack(size_t n) override; StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override; const char * deserializeAndInsertFromArena(const char * pos) override; + const char * deserializeVariantAndInsertFromArena(Discriminator global_discr, const char * pos); const char * skipSerializedInArena(const char * pos) const override; void updateHashWithValue(size_t n, SipHash & hash) const override; void updateWeakHash32(WeakHash32 & hash) const override; @@ -234,6 +248,8 @@ public: ColumnPtr & getVariantPtrByLocalDiscriminator(size_t discr) { return variants[discr]; } ColumnPtr & getVariantPtrByGlobalDiscriminator(size_t discr) { return variants[global_to_local_discriminators.at(discr)]; } + const NestedColumns & getVariants() const { return variants; } + const IColumn & getLocalDiscriminatorsColumn() const { return *local_discriminators; } IColumn & getLocalDiscriminatorsColumn() { return *local_discriminators; } @@ -282,7 +298,19 @@ public: void applyNullMap(const ColumnVector::Container & null_map); void applyNegatedNullMap(const ColumnVector::Container & null_map); + /// Extend current column with new variants. Change global discriminators of current variants to the new + /// according to the mapping and add new variants with new global discriminators. + /// This extension doesn't rewrite any data, just adds new empty variants and modifies global/local discriminators matching. + void extend(const std::vector & old_to_new_global_discriminators, std::vector> && new_variants_and_discriminators); + + bool hasDynamicStructure() const override; + void takeDynamicStructureFromSourceColumns(const Columns & source_columns) override; + private: + void insertFromImpl(const IColumn & src_, size_t n, const std::vector * global_discriminators_mapping); + void insertRangeFromImpl(const IColumn & src_, size_t start, size_t length, const std::vector * global_discriminators_mapping); + void insertManyFromImpl(const IColumn & src_, size_t position, size_t length, const std::vector * global_discriminators_mapping); + void initIdentityGlobalToLocalDiscriminatorsMapping(); template diff --git a/src/Columns/ColumnVector.h b/src/Columns/ColumnVector.h index 39ee1d931bd..91bceaa4534 100644 --- a/src/Columns/ColumnVector.h +++ b/src/Columns/ColumnVector.h @@ -441,6 +441,9 @@ ColumnPtr ColumnVector::indexImpl(const PaddedPODArray & indexes, size_ return res; } +template +concept is_col_vector = std::is_same_v>; + /// Prevent implicit template instantiation of ColumnVector for common types extern template class ColumnVector; diff --git a/src/Columns/IColumn.cpp b/src/Columns/IColumn.cpp index 18974e49760..479fd7de1bc 100644 --- a/src/Columns/IColumn.cpp +++ b/src/Columns/IColumn.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -461,6 +462,7 @@ template class IColumnHelper; template class IColumnHelper; template class IColumnHelper; template class IColumnHelper; +template class IColumnHelper; template class IColumnHelper; diff --git a/src/Columns/IColumn.h b/src/Columns/IColumn.h index cf2693e008c..b49d6f2a66d 100644 --- a/src/Columns/IColumn.h +++ b/src/Columns/IColumn.h @@ -534,6 +534,11 @@ public: return res; } + /// Checks if column has dynamic subcolumns. + virtual bool hasDynamicStructure() const { return false; } + /// For columns with dynamic subcolumns this method takes dynamic structure from source columns + /// and creates proper resulting dynamic structure in advance for merge of these source columns. + virtual void takeDynamicStructureFromSourceColumns(const std::vector & /*source_columns*/) {} /** Some columns can contain another columns inside. * So, we have a tree of columns. But not all combinations are possible. diff --git a/src/Columns/tests/gtest_column_dynamic.cpp b/src/Columns/tests/gtest_column_dynamic.cpp new file mode 100644 index 00000000000..a2862b09de1 --- /dev/null +++ b/src/Columns/tests/gtest_column_dynamic.cpp @@ -0,0 +1,652 @@ +#include +#include +#include +#include + +using namespace DB; + +TEST(ColumnDynamic, CreateEmpty) +{ + auto column = ColumnDynamic::create(255); + ASSERT_TRUE(column->empty()); + ASSERT_EQ(column->getVariantInfo().variant_type->getName(), "Variant()"); + ASSERT_TRUE(column->getVariantInfo().variant_names.empty()); + ASSERT_TRUE(column->getVariantInfo().variant_name_to_discriminator.empty()); +} + +TEST(ColumnDynamic, InsertDefault) +{ + auto column = ColumnDynamic::create(255); + column->insertDefault(); + ASSERT_TRUE(column->size() == 1); + ASSERT_EQ(column->getVariantInfo().variant_type->getName(), "Variant()"); + ASSERT_TRUE(column->getVariantInfo().variant_names.empty()); + ASSERT_TRUE(column->getVariantInfo().variant_name_to_discriminator.empty()); + ASSERT_TRUE(column->isNullAt(0)); + ASSERT_EQ((*column)[0], Field(Null())); +} + +TEST(ColumnDynamic, InsertFields) +{ + auto column = ColumnDynamic::create(255); + column->insert(Field(42)); + column->insert(Field(-42)); + column->insert(Field("str1")); + column->insert(Field(Null())); + column->insert(Field(42.42)); + column->insert(Field(43)); + column->insert(Field(-43)); + column->insert(Field("str2")); + column->insert(Field(Null())); + column->insert(Field(43.43)); + ASSERT_TRUE(column->size() == 10); + + ASSERT_EQ(column->getVariantInfo().variant_type->getName(), "Variant(Float64, Int8, String)"); + std::vector expected_names = {"Float64", "Int8", "String"}; + ASSERT_EQ(column->getVariantInfo().variant_names, expected_names); + std::unordered_map expected_variant_name_to_discriminator = {{"Float64", 0}, {"Int8", 1}, {"String", 2}}; + ASSERT_TRUE(column->getVariantInfo().variant_name_to_discriminator == expected_variant_name_to_discriminator); +} + +ColumnDynamic::MutablePtr getDynamicWithManyVariants(size_t num_variants, Field tuple_element = Field(42)) +{ + auto column = ColumnDynamic::create(255); + for (size_t i = 0; i != num_variants; ++i) + { + Tuple tuple; + for (size_t j = 0; j != i + 1; ++j) + tuple.push_back(tuple_element); + column->insert(tuple); + } + + return column; +} + +TEST(ColumnDynamic, InsertFieldsOverflow1) +{ + auto column = getDynamicWithManyVariants(253); + + ASSERT_EQ(column->getVariantInfo().variant_names.size(), 253); + + column->insert(Field(42.42)); + ASSERT_EQ(column->getVariantInfo().variant_names.size(), 254); + ASSERT_TRUE(column->getVariantInfo().variant_name_to_discriminator.contains("Float64")); + + column->insert(Field(42)); + ASSERT_EQ(column->getVariantInfo().variant_names.size(), 255); + ASSERT_FALSE(column->getVariantInfo().variant_name_to_discriminator.contains("Int8")); + ASSERT_TRUE(column->getVariantInfo().variant_name_to_discriminator.contains("String")); + Field field = (*column)[column->size() - 1]; + ASSERT_EQ(field, "42"); + + column->insert(Field(43)); + ASSERT_EQ(column->getVariantInfo().variant_names.size(), 255); + ASSERT_FALSE(column->getVariantInfo().variant_name_to_discriminator.contains("Int8")); + ASSERT_TRUE(column->getVariantInfo().variant_name_to_discriminator.contains("String")); + field = (*column)[column->size() - 1]; + ASSERT_EQ(field, "43"); + + column->insert(Field("str1")); + ASSERT_EQ(column->getVariantInfo().variant_names.size(), 255); + ASSERT_FALSE(column->getVariantInfo().variant_name_to_discriminator.contains("Int8")); + ASSERT_TRUE(column->getVariantInfo().variant_name_to_discriminator.contains("String")); + field = (*column)[column->size() - 1]; + ASSERT_EQ(field, "str1"); + + column->insert(Field(Array({Field(42), Field(43)}))); + ASSERT_EQ(column->getVariantInfo().variant_names.size(), 255); + ASSERT_FALSE(column->getVariantInfo().variant_name_to_discriminator.contains("Array(Int8)")); + ASSERT_TRUE(column->getVariantInfo().variant_name_to_discriminator.contains("String")); + field = (*column)[column->size() - 1]; + ASSERT_EQ(field, "[42, 43]"); +} + +TEST(ColumnDynamic, InsertFieldsOverflow2) +{ + auto column = getDynamicWithManyVariants(254); + ASSERT_EQ(column->getVariantInfo().variant_names.size(), 254); + + column->insert(Field("str1")); + ASSERT_EQ(column->getVariantInfo().variant_names.size(), 255); + ASSERT_TRUE(column->getVariantInfo().variant_name_to_discriminator.contains("String")); + + column->insert(Field(42)); + ASSERT_EQ(column->getVariantInfo().variant_names.size(), 255); + ASSERT_FALSE(column->getVariantInfo().variant_name_to_discriminator.contains("Int8")); + ASSERT_TRUE(column->getVariantInfo().variant_name_to_discriminator.contains("String")); + Field field = (*column)[column->size() - 1]; + ASSERT_EQ(field, "42"); +} + +ColumnDynamic::MutablePtr getInsertFromColumn(size_t num = 1) +{ + auto column_from = ColumnDynamic::create(255); + for (size_t i = 0; i != num; ++i) + { + column_from->insert(Field(42)); + column_from->insert(Field(42.42)); + column_from->insert(Field("str")); + } + return column_from; +} + +void checkInsertFrom(const ColumnDynamic::MutablePtr & column_from, ColumnDynamic::MutablePtr & column_to, const std::string & expected_variant, const std::vector & expected_names, const std::unordered_map & expected_variant_name_to_discriminator) +{ + column_to->insertFrom(*column_from, 0); + ASSERT_EQ(column_to->getVariantInfo().variant_type->getName(), expected_variant); + ASSERT_EQ(column_to->getVariantInfo().variant_names, expected_names); + ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator == expected_variant_name_to_discriminator); + auto field = (*column_to)[column_to->size() - 1]; + ASSERT_EQ(field, 42); + + column_to->insertFrom(*column_from, 1); + field = (*column_to)[column_to->size() - 1]; + ASSERT_EQ(field, 42.42); + + column_to->insertFrom(*column_from, 2); + field = (*column_to)[column_to->size() - 1]; + ASSERT_EQ(field, "str"); + + ASSERT_EQ(column_to->getVariantInfo().variant_type->getName(), expected_variant); + ASSERT_EQ(column_to->getVariantInfo().variant_names, expected_names); + ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator == expected_variant_name_to_discriminator); +} + +TEST(ColumnDynamic, InsertFrom1) +{ + auto column_to = ColumnDynamic::create(255); + checkInsertFrom(getInsertFromColumn(), column_to, "Variant(Float64, Int8, String)", {"Float64", "Int8", "String"}, {{"Float64", 0}, {"Int8", 1}, {"String", 2}}); +} + +TEST(ColumnDynamic, InsertFrom2) +{ + auto column_to = ColumnDynamic::create(255); + column_to->insert(Field(42)); + column_to->insert(Field(42.42)); + column_to->insert(Field("str")); + + checkInsertFrom(getInsertFromColumn(), column_to, "Variant(Float64, Int8, String)", {"Float64", "Int8", "String"}, {{"Float64", 0}, {"Int8", 1}, {"String", 2}}); +} + +TEST(ColumnDynamic, InsertFrom3) +{ + auto column_to = ColumnDynamic::create(255); + column_to->insert(Field(42)); + column_to->insert(Field(42.42)); + column_to->insert(Field("str")); + column_to->insert(Array({Field(42)})); + + checkInsertFrom(getInsertFromColumn(), column_to, "Variant(Array(Int8), Float64, Int8, String)", {"Array(Int8)", "Float64", "Int8", "String"}, {{"Array(Int8)", 0}, {"Float64", 1}, {"Int8", 2}, {"String", 3}}); +} + +TEST(ColumnDynamic, InsertFromOverflow1) +{ + auto column_from = ColumnDynamic::create(255); + column_from->insert(Field(42)); + column_from->insert(Field(42.42)); + column_from->insert(Field("str")); + + auto column_to = getDynamicWithManyVariants(253); + column_to->insertFrom(*column_from, 0); + ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 254); + ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8")); + auto field = (*column_to)[column_to->size() - 1]; + ASSERT_EQ(field, 42); + + column_to->insertFrom(*column_from, 1); + ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255); + ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64")); + ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String")); + field = (*column_to)[column_to->size() - 1]; + ASSERT_EQ(field, "42.42"); + + column_to->insertFrom(*column_from, 2); + ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255); + ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String")); + field = (*column_to)[column_to->size() - 1]; + ASSERT_EQ(field, "str"); +} + +TEST(ColumnDynamic, InsertFromOverflow2) +{ + auto column_from = ColumnDynamic::create(255); + column_from->insert(Field(42)); + column_from->insert(Field(42.42)); + + auto column_to = getDynamicWithManyVariants(253); + column_to->insertFrom(*column_from, 0); + ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8")); + auto field = (*column_to)[column_to->size() - 1]; + ASSERT_EQ(field, 42); + + column_to->insertFrom(*column_from, 1); + ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64")); + ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String")); + field = (*column_to)[column_to->size() - 1]; + ASSERT_EQ(field, "42.42"); +} + +void checkInsertManyFrom(const ColumnDynamic::MutablePtr & column_from, ColumnDynamic::MutablePtr & column_to, const std::string & expected_variant, const std::vector & expected_names, const std::unordered_map & expected_variant_name_to_discriminator) +{ + column_to->insertManyFrom(*column_from, 0, 2); + ASSERT_EQ(column_to->getVariantInfo().variant_type->getName(), expected_variant); + ASSERT_EQ(column_to->getVariantInfo().variant_names, expected_names); + ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator == expected_variant_name_to_discriminator); + auto field = (*column_to)[column_to->size() - 2]; + ASSERT_EQ(field, 42); + field = (*column_to)[column_to->size() - 1]; + ASSERT_EQ(field, 42); + + column_to->insertManyFrom(*column_from, 1, 2); + field = (*column_to)[column_to->size() - 2]; + ASSERT_EQ(field, 42.42); + field = (*column_to)[column_to->size() - 1]; + ASSERT_EQ(field, 42.42); + + column_to->insertManyFrom(*column_from, 2, 2); + field = (*column_to)[column_to->size() - 2]; + ASSERT_EQ(field, "str"); + field = (*column_to)[column_to->size() - 1]; + ASSERT_EQ(field, "str"); + + ASSERT_EQ(column_to->getVariantInfo().variant_type->getName(), expected_variant); + ASSERT_EQ(column_to->getVariantInfo().variant_names, expected_names); + ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator == expected_variant_name_to_discriminator); +} + +TEST(ColumnDynamic, InsertManyFrom1) +{ + auto column_to = ColumnDynamic::create(255); + checkInsertManyFrom(getInsertFromColumn(), column_to, "Variant(Float64, Int8, String)", {"Float64", "Int8", "String"}, {{"Float64", 0}, {"Int8", 1}, {"String", 2}}); +} + +TEST(ColumnDynamic, InsertManyFrom2) +{ + auto column_to = ColumnDynamic::create(255); + column_to->insert(Field(42)); + column_to->insert(Field(42.42)); + column_to->insert(Field("str")); + + checkInsertManyFrom(getInsertFromColumn(), column_to, "Variant(Float64, Int8, String)", {"Float64", "Int8", "String"}, {{"Float64", 0}, {"Int8", 1}, {"String", 2}}); +} + +TEST(ColumnDynamic, InsertManyFrom3) +{ + auto column_to = ColumnDynamic::create(255); + column_to->insert(Field(42)); + column_to->insert(Field(42.42)); + column_to->insert(Field("str")); + column_to->insert(Array({Field(42)})); + + checkInsertManyFrom(getInsertFromColumn(), column_to, "Variant(Array(Int8), Float64, Int8, String)", {"Array(Int8)", "Float64", "Int8", "String"}, {{"Array(Int8)", 0}, {"Float64", 1}, {"Int8", 2}, {"String", 3}}); +} + +TEST(ColumnDynamic, InsertManyFromOverflow1) +{ + auto column_from = ColumnDynamic::create(255); + column_from->insert(Field(42)); + column_from->insert(Field(42.42)); + column_from->insert(Field("str")); + + auto column_to = getDynamicWithManyVariants(253); + column_to->insertManyFrom(*column_from, 0, 2); + ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 254); + ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8")); + auto field = (*column_to)[column_to->size() - 2]; + ASSERT_EQ(field, 42); + field = (*column_to)[column_to->size() - 1]; + ASSERT_EQ(field, 42); + + column_to->insertManyFrom(*column_from, 1, 2); + ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255); + ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64")); + ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String")); + field = (*column_to)[column_to->size() - 2]; + ASSERT_EQ(field, "42.42"); + field = (*column_to)[column_to->size() - 1]; + ASSERT_EQ(field, "42.42"); + + column_to->insertManyFrom(*column_from, 2, 2); + ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255); + ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String")); + field = (*column_to)[column_to->size() - 1]; + ASSERT_EQ(field, "str"); + field = (*column_to)[column_to->size() - 2]; + ASSERT_EQ(field, "str"); +} + +TEST(ColumnDynamic, InsertManyFromOverflow2) +{ + auto column_from = ColumnDynamic::create(255); + column_from->insert(Field(42)); + column_from->insert(Field(42.42)); + + auto column_to = getDynamicWithManyVariants(253); + column_to->insertManyFrom(*column_from, 0, 2); + ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 254); + ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8")); + auto field = (*column_to)[column_to->size() - 2]; + ASSERT_EQ(field, 42); + field = (*column_to)[column_to->size() - 1]; + ASSERT_EQ(field, 42); + + column_to->insertManyFrom(*column_from, 1, 2); + ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255); + ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64")); + ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String")); + field = (*column_to)[column_to->size() - 2]; + ASSERT_EQ(field, "42.42"); + field = (*column_to)[column_to->size() - 1]; + ASSERT_EQ(field, "42.42"); +} + +void checkInsertRangeFrom(const ColumnDynamic::MutablePtr & column_from, ColumnDynamic::MutablePtr & column_to, const std::string & expected_variant, const std::vector & expected_names, const std::unordered_map & expected_variant_name_to_discriminator) +{ + column_to->insertRangeFrom(*column_from, 0, 3); + ASSERT_EQ(column_to->getVariantInfo().variant_type->getName(), expected_variant); + ASSERT_EQ(column_to->getVariantInfo().variant_names, expected_names); + ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator == expected_variant_name_to_discriminator); + auto field = (*column_to)[column_to->size() - 3]; + ASSERT_EQ(field, 42); + field = (*column_to)[column_to->size() - 2]; + ASSERT_EQ(field, 42.42); + field = (*column_to)[column_to->size() - 1]; + ASSERT_EQ(field, "str"); + + column_to->insertRangeFrom(*column_from, 3, 3); + field = (*column_to)[column_to->size() - 3]; + ASSERT_EQ(field, 42); + field = (*column_to)[column_to->size() - 2]; + ASSERT_EQ(field, 42.42); + field = (*column_to)[column_to->size() - 1]; + ASSERT_EQ(field, "str"); + + ASSERT_EQ(column_to->getVariantInfo().variant_type->getName(), expected_variant); + ASSERT_EQ(column_to->getVariantInfo().variant_names, expected_names); + ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator == expected_variant_name_to_discriminator); +} + +TEST(ColumnDynamic, InsertRangeFrom1) +{ + auto column_to = ColumnDynamic::create(255); + checkInsertRangeFrom(getInsertFromColumn(2), column_to, "Variant(Float64, Int8, String)", {"Float64", "Int8", "String"}, {{"Float64", 0}, {"Int8", 1}, {"String", 2}}); +} + +TEST(ColumnDynamic, InsertRangeFrom2) +{ + auto column_to = ColumnDynamic::create(255); + column_to->insert(Field(42)); + column_to->insert(Field(42.42)); + column_to->insert(Field("str1")); + + checkInsertRangeFrom(getInsertFromColumn(2), column_to, "Variant(Float64, Int8, String)", {"Float64", "Int8", "String"}, {{"Float64", 0}, {"Int8", 1}, {"String", 2}}); +} + +TEST(ColumnDynamic, InsertRangeFrom3) +{ + auto column_to = ColumnDynamic::create(255); + column_to->insert(Field(42)); + column_to->insert(Field(42.42)); + column_to->insert(Field("str1")); + column_to->insert(Array({Field(42)})); + + checkInsertRangeFrom(getInsertFromColumn(2), column_to, "Variant(Array(Int8), Float64, Int8, String)", {"Array(Int8)", "Float64", "Int8", "String"}, {{"Array(Int8)", 0}, {"Float64", 1}, {"Int8", 2}, {"String", 3}}); +} + +TEST(ColumnDynamic, InsertRangeFromOverflow1) +{ + auto column_from = ColumnDynamic::create(255); + column_from->insert(Field(42)); + column_from->insert(Field(43)); + column_from->insert(Field(42.42)); + column_from->insert(Field("str")); + + auto column_to = getDynamicWithManyVariants(253); + column_to->insertRangeFrom(*column_from, 0, 4); + ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255); + ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8")); + ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String")); + ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64")); + auto field = (*column_to)[column_to->size() - 4]; + ASSERT_EQ(field, Field(42)); + field = (*column_to)[column_to->size() - 3]; + ASSERT_EQ(field, Field(43)); + field = (*column_to)[column_to->size() - 2]; + ASSERT_EQ(field, Field("42.42")); + field = (*column_to)[column_to->size() - 1]; + ASSERT_EQ(field, Field("str")); +} + +TEST(ColumnDynamic, InsertRangeFromOverflow2) +{ + auto column_from = ColumnDynamic::create(255); + column_from->insert(Field(42)); + column_from->insert(Field(43)); + column_from->insert(Field(42.42)); + + auto column_to = getDynamicWithManyVariants(253); + column_to->insertRangeFrom(*column_from, 0, 3); + ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255); + ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8")); + ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String")); + ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64")); + auto field = (*column_to)[column_to->size() - 3]; + ASSERT_EQ(field, Field(42)); + field = (*column_to)[column_to->size() - 2]; + ASSERT_EQ(field, Field(43)); + field = (*column_to)[column_to->size() - 1]; + ASSERT_EQ(field, Field("42.42")); +} + +TEST(ColumnDynamic, InsertRangeFromOverflow3) +{ + auto column_from = ColumnDynamic::create(255); + column_from->insert(Field(42)); + column_from->insert(Field(43)); + column_from->insert(Field(42.42)); + + auto column_to = getDynamicWithManyVariants(253); + column_to->insert(Field("Str")); + column_to->insertRangeFrom(*column_from, 0, 3); + ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255); + ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8")); + ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String")); + ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64")); + auto field = (*column_to)[column_to->size() - 3]; + ASSERT_EQ(field, Field(42)); + field = (*column_to)[column_to->size() - 2]; + ASSERT_EQ(field, Field(43)); + field = (*column_to)[column_to->size() - 1]; + ASSERT_EQ(field, Field("42.42")); +} + +TEST(ColumnDynamic, InsertRangeFromOverflow4) +{ + auto column_from = ColumnDynamic::create(255); + column_from->insert(Field(42)); + column_from->insert(Field(42.42)); + column_from->insert(Field("str")); + + auto column_to = getDynamicWithManyVariants(254); + column_to->insertRangeFrom(*column_from, 0, 3); + ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255); + ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8")); + ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String")); + ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64")); + auto field = (*column_to)[column_to->size() - 3]; + ASSERT_EQ(field, Field("42")); + field = (*column_to)[column_to->size() - 2]; + ASSERT_EQ(field, Field("42.42")); + field = (*column_to)[column_to->size() - 1]; + ASSERT_EQ(field, Field("str")); +} + +TEST(ColumnDynamic, InsertRangeFromOverflow5) +{ + auto column_from = ColumnDynamic::create(255); + column_from->insert(Field(42)); + column_from->insert(Field(43)); + column_from->insert(Field(42.42)); + column_from->insert(Field("str")); + + auto column_to = getDynamicWithManyVariants(253); + column_to->insert(Field("str")); + column_to->insertRangeFrom(*column_from, 0, 4); + ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255); + ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8")); + ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String")); + ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64")); + auto field = (*column_to)[column_to->size() - 4]; + ASSERT_EQ(field, Field(42)); + field = (*column_to)[column_to->size() - 3]; + ASSERT_EQ(field, Field(43)); + field = (*column_to)[column_to->size() - 2]; + ASSERT_EQ(field, Field("42.42")); + field = (*column_to)[column_to->size() - 1]; + ASSERT_EQ(field, Field("str")); +} + +TEST(ColumnDynamic, InsertRangeFromOverflow6) +{ + auto column_from = ColumnDynamic::create(255); + column_from->insert(Field(42)); + column_from->insert(Field(43)); + column_from->insert(Field(44)); + column_from->insert(Field(42.42)); + column_from->insert(Field(43.43)); + column_from->insert(Field("str")); + column_from->insert(Field(Array({Field(42)}))); + + auto column_to = getDynamicWithManyVariants(253); + column_to->insertRangeFrom(*column_from, 2, 5); + ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255); + ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64")); + ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String")); + ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8")); + ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Array(Int8)")); + auto field = (*column_to)[column_to->size() - 5]; + + ASSERT_EQ(field, Field("44")); + field = (*column_to)[column_to->size() - 4]; + ASSERT_EQ(field, Field(42.42)); + field = (*column_to)[column_to->size() - 3]; + ASSERT_EQ(field, Field(43.43)); + field = (*column_to)[column_to->size() - 2]; + ASSERT_EQ(field, Field("str")); + field = (*column_to)[column_to->size() - 1]; + ASSERT_EQ(field, Field("[42]")); +} + +TEST(ColumnDynamic, SerializeDeserializeFromArena1) +{ + auto column = ColumnDynamic::create(255); + column->insert(Field(42)); + column->insert(Field(42.42)); + column->insert(Field("str")); + column->insert(Field(Null())); + + Arena arena; + const char * pos = nullptr; + auto ref1 = column->serializeValueIntoArena(0, arena, pos); + column->serializeValueIntoArena(1, arena, pos); + column->serializeValueIntoArena(2, arena, pos); + column->serializeValueIntoArena(3, arena, pos); + pos = column->deserializeAndInsertFromArena(ref1.data); + pos = column->deserializeAndInsertFromArena(pos); + pos = column->deserializeAndInsertFromArena(pos); + column->deserializeAndInsertFromArena(pos); + + ASSERT_EQ((*column)[column->size() - 4], 42); + ASSERT_EQ((*column)[column->size() - 3], 42.42); + ASSERT_EQ((*column)[column->size() - 2], "str"); + ASSERT_EQ((*column)[column->size() - 1], Null()); +} + +TEST(ColumnDynamic, SerializeDeserializeFromArena2) +{ + auto column_from = ColumnDynamic::create(255); + column_from->insert(Field(42)); + column_from->insert(Field(42.42)); + column_from->insert(Field("str")); + column_from->insert(Field(Null())); + + Arena arena; + const char * pos = nullptr; + auto ref1 = column_from->serializeValueIntoArena(0, arena, pos); + column_from->serializeValueIntoArena(1, arena, pos); + column_from->serializeValueIntoArena(2, arena, pos); + column_from->serializeValueIntoArena(3, arena, pos); + + auto column_to = ColumnDynamic::create(255); + pos = column_to->deserializeAndInsertFromArena(ref1.data); + pos = column_to->deserializeAndInsertFromArena(pos); + pos = column_to->deserializeAndInsertFromArena(pos); + column_to->deserializeAndInsertFromArena(pos); + + ASSERT_EQ((*column_from)[column_from->size() - 4], 42); + ASSERT_EQ((*column_from)[column_from->size() - 3], 42.42); + ASSERT_EQ((*column_from)[column_from->size() - 2], "str"); + ASSERT_EQ((*column_from)[column_from->size() - 1], Null()); + ASSERT_EQ(column_to->getVariantInfo().variant_type->getName(), "Variant(Float64, Int8, String)"); + std::vector expected_names = {"Float64", "Int8", "String"}; + ASSERT_EQ(column_to->getVariantInfo().variant_names, expected_names); + std::unordered_map expected_variant_name_to_discriminator = {{"Float64", 0}, {"Int8", 1}, {"String", 2}}; + ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator == expected_variant_name_to_discriminator); +} + +TEST(ColumnDynamic, SerializeDeserializeFromArenaOverflow) +{ + auto column_from = ColumnDynamic::create(255); + column_from->insert(Field(42)); + column_from->insert(Field(42.42)); + column_from->insert(Field("str")); + column_from->insert(Field(Null())); + + Arena arena; + const char * pos = nullptr; + auto ref1 = column_from->serializeValueIntoArena(0, arena, pos); + column_from->serializeValueIntoArena(1, arena, pos); + column_from->serializeValueIntoArena(2, arena, pos); + column_from->serializeValueIntoArena(3, arena, pos); + + auto column_to = getDynamicWithManyVariants(253); + pos = column_to->deserializeAndInsertFromArena(ref1.data); + pos = column_to->deserializeAndInsertFromArena(pos); + pos = column_to->deserializeAndInsertFromArena(pos); + column_to->deserializeAndInsertFromArena(pos); + + ASSERT_EQ((*column_from)[column_from->size() - 4], 42); + ASSERT_EQ((*column_from)[column_from->size() - 3], 42.42); + ASSERT_EQ((*column_from)[column_from->size() - 2], "str"); + ASSERT_EQ((*column_from)[column_from->size() - 1], Null()); + ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8")); + ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64")); + ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String")); +} + +TEST(ColumnDynamic, skipSerializedInArena) +{ + auto column_from = ColumnDynamic::create(255); + column_from->insert(Field(42)); + column_from->insert(Field(42.42)); + column_from->insert(Field("str")); + column_from->insert(Field(Null())); + + Arena arena; + const char * pos = nullptr; + auto ref1 = column_from->serializeValueIntoArena(0, arena, pos); + column_from->serializeValueIntoArena(1, arena, pos); + column_from->serializeValueIntoArena(2, arena, pos); + auto ref4 = column_from->serializeValueIntoArena(3, arena, pos); + + const char * end = ref4.data + ref4.size; + auto column_to = ColumnDynamic::create(255); + pos = column_to->skipSerializedInArena(ref1.data); + pos = column_to->skipSerializedInArena(pos); + pos = column_to->skipSerializedInArena(pos); + pos = column_to->skipSerializedInArena(pos); + + ASSERT_EQ(pos, end); + ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.empty()); + ASSERT_TRUE(column_to->getVariantInfo().variant_names.empty()); +} diff --git a/src/Common/AsyncLoader.cpp b/src/Common/AsyncLoader.cpp index 9607333b9f7..cfb273b9058 100644 --- a/src/Common/AsyncLoader.cpp +++ b/src/Common/AsyncLoader.cpp @@ -30,6 +30,7 @@ namespace ErrorCodes extern const int ASYNC_LOAD_CYCLE; extern const int ASYNC_LOAD_FAILED; extern const int ASYNC_LOAD_CANCELED; + extern const int ASYNC_LOAD_WAIT_FAILED; extern const int LOGICAL_ERROR; } @@ -433,7 +434,7 @@ void AsyncLoader::wait(const LoadJobPtr & job, bool no_throw) std::unique_lock job_lock{job->mutex}; wait(job_lock, job); if (!no_throw && job->load_exception) - std::rethrow_exception(job->load_exception); + throw Exception(ErrorCodes::ASYNC_LOAD_WAIT_FAILED, "Waited job failed: {}", getExceptionMessage(job->load_exception, /* with_stacktrace = */ false)); } void AsyncLoader::remove(const LoadJobSet & jobs) diff --git a/src/Common/CPUID.h b/src/Common/CPUID.h index d7a714ec5af..b49f7706904 100644 --- a/src/Common/CPUID.h +++ b/src/Common/CPUID.h @@ -69,9 +69,9 @@ union CPUInfo UInt32 edx; } registers; - inline explicit CPUInfo(UInt32 op) noexcept { cpuid(op, info); } + explicit CPUInfo(UInt32 op) noexcept { cpuid(op, info); } - inline CPUInfo(UInt32 op, UInt32 sub_op) noexcept { cpuid(op, sub_op, info); } + CPUInfo(UInt32 op, UInt32 sub_op) noexcept { cpuid(op, sub_op, info); } }; inline bool haveRDTSCP() noexcept diff --git a/src/Common/ColumnsHashingImpl.h b/src/Common/ColumnsHashingImpl.h index f74a56292ae..0e013decf1f 100644 --- a/src/Common/ColumnsHashingImpl.h +++ b/src/Common/ColumnsHashingImpl.h @@ -453,7 +453,7 @@ protected: /// Return the columns which actually contain the values of the keys. /// For a given key column, if it is nullable, we return its nested /// column. Otherwise we return the key column itself. - inline const ColumnRawPtrs & getActualColumns() const + const ColumnRawPtrs & getActualColumns() const { return actual_columns; } diff --git a/src/Common/CombinedCardinalityEstimator.h b/src/Common/CombinedCardinalityEstimator.h index 0e53755d773..132f00de8eb 100644 --- a/src/Common/CombinedCardinalityEstimator.h +++ b/src/Common/CombinedCardinalityEstimator.h @@ -292,13 +292,13 @@ private: } template - inline T & getContainer() + T & getContainer() { return *reinterpret_cast(address & mask); } template - inline const T & getContainer() const + const T & getContainer() const { return *reinterpret_cast(address & mask); } @@ -309,7 +309,7 @@ private: address |= static_cast(t); } - inline details::ContainerType getContainerType() const + details::ContainerType getContainerType() const { return static_cast(address & ~mask); } diff --git a/src/Common/CompactArray.h b/src/Common/CompactArray.h index 613dc3d0b90..7b2bd658d2e 100644 --- a/src/Common/CompactArray.h +++ b/src/Common/CompactArray.h @@ -116,7 +116,7 @@ public: /** Return the current cell number and the corresponding content. */ - inline std::pair get() const + std::pair get() const { if ((current_bucket_index == 0) || is_eof) throw Exception(ErrorCodes::NO_AVAILABLE_DATA, "No available data."); diff --git a/src/Common/CounterInFile.h b/src/Common/CounterInFile.h index 854bf7cc675..0a11e52be2c 100644 --- a/src/Common/CounterInFile.h +++ b/src/Common/CounterInFile.h @@ -37,7 +37,7 @@ namespace fs = std::filesystem; class CounterInFile { private: - static inline constexpr size_t SMALL_READ_WRITE_BUFFER_SIZE = 16; + static constexpr size_t SMALL_READ_WRITE_BUFFER_SIZE = 16; public: /// path - the name of the file, including the path diff --git a/src/Common/CurrentMetrics.cpp b/src/Common/CurrentMetrics.cpp index 21b4d114d79..e73ac307a35 100644 --- a/src/Common/CurrentMetrics.cpp +++ b/src/Common/CurrentMetrics.cpp @@ -168,6 +168,9 @@ M(ObjectStorageS3Threads, "Number of threads in the S3ObjectStorage thread pool.") \ M(ObjectStorageS3ThreadsActive, "Number of threads in the S3ObjectStorage thread pool running a task.") \ M(ObjectStorageS3ThreadsScheduled, "Number of queued or active jobs in the S3ObjectStorage thread pool.") \ + M(StorageObjectStorageThreads, "Number of threads in the remote table engines thread pools.") \ + M(StorageObjectStorageThreadsActive, "Number of threads in the remote table engines thread pool running a task.") \ + M(StorageObjectStorageThreadsScheduled, "Number of queued or active jobs in remote table engines thread pool.") \ M(ObjectStorageAzureThreads, "Number of threads in the AzureObjectStorage thread pool.") \ M(ObjectStorageAzureThreadsActive, "Number of threads in the AzureObjectStorage thread pool running a task.") \ M(ObjectStorageAzureThreadsScheduled, "Number of queued or active jobs in the AzureObjectStorage thread pool.") \ @@ -224,6 +227,8 @@ M(PartsActive, "Active data part, used by current and upcoming SELECTs.") \ M(AttachedDatabase, "Active database, used by current and upcoming SELECTs.") \ M(AttachedTable, "Active table, used by current and upcoming SELECTs.") \ + M(AttachedView, "Active view, used by current and upcoming SELECTs.") \ + M(AttachedDictionary, "Active dictionary, used by current and upcoming SELECTs.") \ M(PartsOutdated, "Not active data part, but could be used by only current SELECTs, could be deleted after SELECTs finishes.") \ M(PartsDeleting, "Not active data part with identity refcounter, it is deleting right now by a cleaner.") \ M(PartsDeleteOnDestroy, "Part was moved to another disk and should be deleted in own destructor.") \ diff --git a/src/Common/CurrentThread.h b/src/Common/CurrentThread.h index e2b627a7f29..53b61ba315f 100644 --- a/src/Common/CurrentThread.h +++ b/src/Common/CurrentThread.h @@ -64,7 +64,7 @@ public: static ProfileEvents::Counters & getProfileEvents(); inline ALWAYS_INLINE static MemoryTracker * getMemoryTracker() { - if (unlikely(!current_thread)) + if (!current_thread) [[unlikely]] return nullptr; return ¤t_thread->memory_tracker; } diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index 44c051401ef..ea6f9510927 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -600,6 +600,8 @@ M(719, QUERY_CACHE_USED_WITH_SYSTEM_TABLE) \ M(720, USER_EXPIRED) \ M(721, DEPRECATED_FUNCTION) \ + M(722, ASYNC_LOAD_WAIT_FAILED) \ + M(723, PARQUET_EXCEPTION) \ \ M(900, DISTRIBUTED_CACHE_ERROR) \ M(901, CANNOT_USE_DISTRIBUTED_CACHE) \ diff --git a/src/Common/HashTable/FixedHashTable.h b/src/Common/HashTable/FixedHashTable.h index 49675aaafbc..9666706ba20 100644 --- a/src/Common/HashTable/FixedHashTable.h +++ b/src/Common/HashTable/FixedHashTable.h @@ -115,6 +115,12 @@ class FixedHashTable : private boost::noncopyable, protected Allocator, protecte { static constexpr size_t NUM_CELLS = 1ULL << (sizeof(Key) * 8); + /// We maintain min and max values inserted into the hash table to then limit the amount of cells to traverse to the [min; max] range. + /// Both values could be efficiently calculated only within `emplace` calls (and not when we populate the hash table in `read` method for example), so we update them only within `emplace` and track if any other method was called. + bool only_emplace_was_used_to_insert_data = true; + size_t min = NUM_CELLS - 1; + size_t max = 0; + protected: friend class const_iterator; friend class iterator; @@ -170,6 +176,8 @@ protected: /// Skip empty cells in the main buffer. const auto * buf_end = container->buf + container->NUM_CELLS; + if (container->canUseMinMaxOptimization()) + buf_end = container->buf + container->max + 1; while (ptr < buf_end && ptr->isZero(*container)) ++ptr; @@ -261,7 +269,7 @@ public: return true; } - inline const value_type & get() const + const value_type & get() const { if (!is_initialized || is_eof) throw DB::Exception(DB::ErrorCodes::NO_AVAILABLE_DATA, "No available data"); @@ -297,12 +305,7 @@ public: if (!buf) return end(); - const Cell * ptr = buf; - auto buf_end = buf + NUM_CELLS; - while (ptr < buf_end && ptr->isZero(*this)) - ++ptr; - - return const_iterator(this, ptr); + return const_iterator(this, firstPopulatedCell()); } const_iterator cbegin() const { return begin(); } @@ -312,18 +315,13 @@ public: if (!buf) return end(); - Cell * ptr = buf; - auto buf_end = buf + NUM_CELLS; - while (ptr < buf_end && ptr->isZero(*this)) - ++ptr; - - return iterator(this, ptr); + return iterator(this, const_cast(firstPopulatedCell())); } const_iterator end() const { /// Avoid UBSan warning about adding zero to nullptr. It is valid in C++20 (and earlier) but not valid in C. - return const_iterator(this, buf ? buf + NUM_CELLS : buf); + return const_iterator(this, buf ? lastPopulatedCell() : buf); } const_iterator cend() const @@ -333,7 +331,7 @@ public: iterator end() { - return iterator(this, buf ? buf + NUM_CELLS : buf); + return iterator(this, buf ? lastPopulatedCell() : buf); } @@ -350,6 +348,8 @@ public: new (&buf[x]) Cell(x, *this); inserted = true; + if (x < min) min = x; + if (x > max) max = x; this->increaseSize(); } @@ -377,6 +377,26 @@ public: bool ALWAYS_INLINE has(const Key & x) const { return !buf[x].isZero(*this); } bool ALWAYS_INLINE has(const Key &, size_t hash_value) const { return !buf[hash_value].isZero(*this); } + /// Decide if we use the min/max optimization. `max < min` means the FixedHashtable is empty. The flag `only_emplace_was_used_to_insert_data` + /// will check if the FixedHashTable will only use `emplace()` to insert the raw data. + bool ALWAYS_INLINE canUseMinMaxOptimization() const { return ((max >= min) && only_emplace_was_used_to_insert_data); } + + const Cell * ALWAYS_INLINE firstPopulatedCell() const + { + const Cell * ptr = buf; + if (!canUseMinMaxOptimization()) + { + while (ptr < buf + NUM_CELLS && ptr->isZero(*this)) + ++ptr; + } + else + ptr = buf + min; + + return ptr; + } + + Cell * ALWAYS_INLINE lastPopulatedCell() const { return canUseMinMaxOptimization() ? buf + max + 1 : buf + NUM_CELLS; } + void write(DB::WriteBuffer & wb) const { Cell::State::write(wb); @@ -433,6 +453,7 @@ public: x.read(rb); new (&buf[place_value]) Cell(x, *this); } + only_emplace_was_used_to_insert_data = false; } void readText(DB::ReadBuffer & rb) @@ -455,6 +476,7 @@ public: x.readText(rb); new (&buf[place_value]) Cell(x, *this); } + only_emplace_was_used_to_insert_data = false; } size_t size() const { return this->getSize(buf, *this, NUM_CELLS); } @@ -493,7 +515,11 @@ public: } const Cell * data() const { return buf; } - Cell * data() { return buf; } + Cell * data() + { + only_emplace_was_used_to_insert_data = false; + return buf; + } #ifdef DBMS_HASH_MAP_COUNT_COLLISIONS size_t getCollisions() const { return 0; } diff --git a/src/Common/HashTable/HashTable.h b/src/Common/HashTable/HashTable.h index 9050b7ef6d7..a600f57b06a 100644 --- a/src/Common/HashTable/HashTable.h +++ b/src/Common/HashTable/HashTable.h @@ -844,7 +844,7 @@ public: return true; } - inline const value_type & get() const + const value_type & get() const { if (!is_initialized || is_eof) throw DB::Exception(DB::ErrorCodes::NO_AVAILABLE_DATA, "No available data"); diff --git a/src/Common/HashTable/PackedHashMap.h b/src/Common/HashTable/PackedHashMap.h index 0d25addb58e..72eb721b274 100644 --- a/src/Common/HashTable/PackedHashMap.h +++ b/src/Common/HashTable/PackedHashMap.h @@ -69,7 +69,7 @@ struct PackedHashMapCell : public HashMapCellvalue.first, state); } static bool isZero(const Key key, const State & /*state*/) { return ZeroTraits::check(key); } - static inline bool bitEqualsByValue(key_type a, key_type b) { return a == b; } + static bool bitEqualsByValue(key_type a, key_type b) { return a == b; } template auto get() const diff --git a/src/Common/HashTable/SmallTable.h b/src/Common/HashTable/SmallTable.h index 3229e4748ea..63a6b932dd0 100644 --- a/src/Common/HashTable/SmallTable.h +++ b/src/Common/HashTable/SmallTable.h @@ -112,7 +112,7 @@ public: return true; } - inline const value_type & get() const + const value_type & get() const { if (!is_initialized || is_eof) throw DB::Exception(DB::ErrorCodes::NO_AVAILABLE_DATA, "No available data"); diff --git a/src/Common/HyperLogLogCounter.h b/src/Common/HyperLogLogCounter.h index bacd4cc7288..9b2b33dc918 100644 --- a/src/Common/HyperLogLogCounter.h +++ b/src/Common/HyperLogLogCounter.h @@ -128,13 +128,13 @@ public: { } - inline void update(UInt8 cur_rank, UInt8 new_rank) + void update(UInt8 cur_rank, UInt8 new_rank) { denominator -= static_cast(1.0) / (1ULL << cur_rank); denominator += static_cast(1.0) / (1ULL << new_rank); } - inline void update(UInt8 rank) + void update(UInt8 rank) { denominator += static_cast(1.0) / (1ULL << rank); } @@ -166,13 +166,13 @@ public: rank_count[0] = static_cast(initial_value); } - inline void update(UInt8 cur_rank, UInt8 new_rank) + void update(UInt8 cur_rank, UInt8 new_rank) { --rank_count[cur_rank]; ++rank_count[new_rank]; } - inline void update(UInt8 rank) + void update(UInt8 rank) { ++rank_count[rank]; } @@ -429,13 +429,13 @@ public: private: /// Extract subset of bits in [begin, end[ range. - inline HashValueType extractBitSequence(HashValueType val, UInt8 begin, UInt8 end) const + HashValueType extractBitSequence(HashValueType val, UInt8 begin, UInt8 end) const { return (val >> begin) & ((1ULL << (end - begin)) - 1); } /// Rank is number of trailing zeros. - inline UInt8 calculateRank(HashValueType val) const + UInt8 calculateRank(HashValueType val) const { if (unlikely(val == 0)) return max_rank; @@ -448,7 +448,7 @@ private: return zeros_plus_one; } - inline HashValueType getHash(Value key) const + HashValueType getHash(Value key) const { /// NOTE: this should be OK, since value is the same as key for HLL. return static_cast( @@ -496,7 +496,7 @@ private: throw Poco::Exception("Internal error", DB::ErrorCodes::LOGICAL_ERROR); } - inline double applyCorrection(double raw_estimate) const + double applyCorrection(double raw_estimate) const { double fixed_estimate; @@ -525,7 +525,7 @@ private: /// Correction used in HyperLogLog++ algorithm. /// Source: "HyperLogLog in Practice: Algorithmic Engineering of a State of The Art Cardinality Estimation Algorithm" /// (S. Heule et al., Proceedings of the EDBT 2013 Conference). - inline double applyBiasCorrection(double raw_estimate) const + double applyBiasCorrection(double raw_estimate) const { double fixed_estimate; @@ -540,7 +540,7 @@ private: /// Calculation of unique values using LinearCounting algorithm. /// Source: "A Linear-time Probabilistic Counting Algorithm for Database Applications" /// (Whang et al., ACM Trans. Database Syst., pp. 208-229, 1990). - inline double applyLinearCorrection(double raw_estimate) const + double applyLinearCorrection(double raw_estimate) const { double fixed_estimate; diff --git a/src/Common/IntervalTree.h b/src/Common/IntervalTree.h index fbd1de3197e..db7f5238921 100644 --- a/src/Common/IntervalTree.h +++ b/src/Common/IntervalTree.h @@ -23,7 +23,7 @@ struct Interval Interval(IntervalStorageType left_, IntervalStorageType right_) : left(left_), right(right_) { } - inline bool contains(IntervalStorageType point) const { return left <= point && point <= right; } + bool contains(IntervalStorageType point) const { return left <= point && point <= right; } }; template @@ -290,7 +290,7 @@ private: IntervalStorageType middle_element; - inline bool hasValue() const { return sorted_intervals_range_size != 0; } + bool hasValue() const { return sorted_intervals_range_size != 0; } }; using IntervalWithEmptyValue = Interval; @@ -585,7 +585,7 @@ private: } } - inline size_t findFirstIteratorNodeIndex() const + size_t findFirstIteratorNodeIndex() const { size_t nodes_size = nodes.size(); size_t result_index = 0; @@ -602,7 +602,7 @@ private: return result_index; } - inline size_t findLastIteratorNodeIndex() const + size_t findLastIteratorNodeIndex() const { if (unlikely(nodes.empty())) return 0; @@ -618,7 +618,7 @@ private: return result_index; } - inline void increaseIntervalsSize() + void increaseIntervalsSize() { /// Before tree is build we store all intervals size in our first node to allow tree iteration. ++intervals_size; @@ -630,7 +630,7 @@ private: size_t intervals_size = 0; bool tree_is_built = false; - static inline const Interval & getInterval(const IntervalWithValue & interval_with_value) + static const Interval & getInterval(const IntervalWithValue & interval_with_value) { if constexpr (is_empty_value) return interval_with_value; @@ -639,7 +639,7 @@ private: } template - static inline bool callCallback(const IntervalWithValue & interval, IntervalCallback && callback) + static bool callCallback(const IntervalWithValue & interval, IntervalCallback && callback) { if constexpr (is_empty_value) return callback(interval); @@ -647,7 +647,7 @@ private: return callback(interval.first, interval.second); } - static inline void + static void intervalsToPoints(const std::vector & intervals, std::vector & temporary_points_storage) { for (const auto & interval_with_value : intervals) @@ -658,7 +658,7 @@ private: } } - static inline IntervalStorageType pointsMedian(std::vector & points) + static IntervalStorageType pointsMedian(std::vector & points) { size_t size = points.size(); size_t middle_element_index = size / 2; diff --git a/src/Common/JSONParsers/SimdJSONParser.h b/src/Common/JSONParsers/SimdJSONParser.h index a8594710d20..827d142266a 100644 --- a/src/Common/JSONParsers/SimdJSONParser.h +++ b/src/Common/JSONParsers/SimdJSONParser.h @@ -26,62 +26,62 @@ class SimdJSONBasicFormatter { public: explicit SimdJSONBasicFormatter(PaddedPODArray & buffer_) : buffer(buffer_) {} - inline void comma() { oneChar(','); } + void comma() { oneChar(','); } /** Start an array, prints [ **/ - inline void startArray() { oneChar('['); } + void startArray() { oneChar('['); } /** End an array, prints ] **/ - inline void endArray() { oneChar(']'); } + void endArray() { oneChar(']'); } /** Start an array, prints { **/ - inline void startObject() { oneChar('{'); } + void startObject() { oneChar('{'); } /** Start an array, prints } **/ - inline void endObject() { oneChar('}'); } + void endObject() { oneChar('}'); } /** Prints a true **/ - inline void trueAtom() + void trueAtom() { const char * s = "true"; buffer.insert(s, s + 4); } /** Prints a false **/ - inline void falseAtom() + void falseAtom() { const char * s = "false"; buffer.insert(s, s + 5); } /** Prints a null **/ - inline void nullAtom() + void nullAtom() { const char * s = "null"; buffer.insert(s, s + 4); } /** Prints a number **/ - inline void number(int64_t x) + void number(int64_t x) { char number_buffer[24]; auto res = std::to_chars(number_buffer, number_buffer + sizeof(number_buffer), x); buffer.insert(number_buffer, res.ptr); } /** Prints a number **/ - inline void number(uint64_t x) + void number(uint64_t x) { char number_buffer[24]; auto res = std::to_chars(number_buffer, number_buffer + sizeof(number_buffer), x); buffer.insert(number_buffer, res.ptr); } /** Prints a number **/ - inline void number(double x) + void number(double x) { char number_buffer[24]; auto res = std::to_chars(number_buffer, number_buffer + sizeof(number_buffer), x); buffer.insert(number_buffer, res.ptr); } /** Prints a key (string + colon) **/ - inline void key(std::string_view unescaped) + void key(std::string_view unescaped) { string(unescaped); oneChar(':'); } /** Prints a string. The string is escaped as needed. **/ - inline void string(std::string_view unescaped) + void string(std::string_view unescaped) { oneChar('\"'); size_t i = 0; @@ -165,7 +165,7 @@ public: oneChar('\"'); } - inline void oneChar(char c) + void oneChar(char c) { buffer.push_back(c); } @@ -182,7 +182,7 @@ class SimdJSONElementFormatter public: explicit SimdJSONElementFormatter(PaddedPODArray & buffer_) : format(buffer_) {} /** Append an element to the builder (to be printed) **/ - inline void append(simdjson::dom::element value) + void append(simdjson::dom::element value) { switch (value.type()) { @@ -224,7 +224,7 @@ public: } } /** Append an array to the builder (to be printed) **/ - inline void append(simdjson::dom::array value) + void append(simdjson::dom::array value) { format.startArray(); auto iter = value.begin(); @@ -241,7 +241,7 @@ public: format.endArray(); } - inline void append(simdjson::dom::object value) + void append(simdjson::dom::object value) { format.startObject(); auto pair = value.begin(); @@ -258,7 +258,7 @@ public: format.endObject(); } - inline void append(simdjson::dom::key_value_pair kv) + void append(simdjson::dom::key_value_pair kv) { format.key(kv.key); append(kv.value); diff --git a/src/Common/PODArray.h b/src/Common/PODArray.h index b4069027ad1..ece5114a998 100644 --- a/src/Common/PODArray.h +++ b/src/Common/PODArray.h @@ -284,7 +284,7 @@ public: } template - inline void assertNotIntersects(It1 from_begin [[maybe_unused]], It2 from_end [[maybe_unused]]) + void assertNotIntersects(It1 from_begin [[maybe_unused]], It2 from_end [[maybe_unused]]) { #if !defined(NDEBUG) const char * ptr_begin = reinterpret_cast(&*from_begin); diff --git a/src/Common/PoolBase.h b/src/Common/PoolBase.h index d6fc1656eca..fb0c75e7c95 100644 --- a/src/Common/PoolBase.h +++ b/src/Common/PoolBase.h @@ -174,7 +174,7 @@ public: items.emplace_back(std::make_shared(allocObject(), *this)); } - inline size_t size() + size_t size() { std::lock_guard lock(mutex); return items.size(); diff --git a/src/Common/RadixSort.h b/src/Common/RadixSort.h index a30e19d8212..238321ec76e 100644 --- a/src/Common/RadixSort.h +++ b/src/Common/RadixSort.h @@ -385,7 +385,7 @@ private: * PASS is counted from least significant (0), so the first pass is NUM_PASSES - 1. */ template - static inline void radixSortMSDInternal(Element * arr, size_t size, size_t limit) + static void radixSortMSDInternal(Element * arr, size_t size, size_t limit) { /// The beginning of every i-1-th bucket. 0th element will be equal to 1st. /// Last element will point to array end. @@ -528,7 +528,7 @@ private: // A helper to choose sorting algorithm based on array length template - static inline void radixSortMSDInternalHelper(Element * arr, size_t size, size_t limit) + static void radixSortMSDInternalHelper(Element * arr, size_t size, size_t limit) { if (size <= INSERTION_SORT_THRESHOLD) insertionSortInternal(arr, size); diff --git a/src/Common/SpaceSaving.h b/src/Common/SpaceSaving.h index 7a740ae6c9b..81ac4e71e8c 100644 --- a/src/Common/SpaceSaving.h +++ b/src/Common/SpaceSaving.h @@ -131,12 +131,12 @@ public: ~SpaceSaving() { destroyElements(); } - inline size_t size() const + size_t size() const { return counter_list.size(); } - inline size_t capacity() const + size_t capacity() const { return m_capacity; } diff --git a/src/Common/ThreadProfileEvents.h b/src/Common/ThreadProfileEvents.h index 26aeab08302..0af3ccb4c80 100644 --- a/src/Common/ThreadProfileEvents.h +++ b/src/Common/ThreadProfileEvents.h @@ -107,7 +107,7 @@ struct RUsageCounters } private: - static inline UInt64 getClockMonotonic() + static UInt64 getClockMonotonic() { struct timespec ts; if (0 != clock_gettime(CLOCK_MONOTONIC, &ts)) diff --git a/src/Common/Volnitsky.h b/src/Common/Volnitsky.h index 3a148983790..3f8e1927493 100644 --- a/src/Common/Volnitsky.h +++ b/src/Common/Volnitsky.h @@ -54,16 +54,16 @@ namespace VolnitskyTraits /// min haystack size to use main algorithm instead of fallback static constexpr size_t min_haystack_size_for_algorithm = 20000; - static inline bool isFallbackNeedle(const size_t needle_size, size_t haystack_size_hint = 0) + static bool isFallbackNeedle(const size_t needle_size, size_t haystack_size_hint = 0) { return needle_size < 2 * sizeof(Ngram) || needle_size >= std::numeric_limits::max() || (haystack_size_hint && haystack_size_hint < min_haystack_size_for_algorithm); } - static inline Ngram toNGram(const UInt8 * const pos) { return unalignedLoad(pos); } + static Ngram toNGram(const UInt8 * const pos) { return unalignedLoad(pos); } template - static inline bool putNGramASCIICaseInsensitive(const UInt8 * pos, int offset, Callback && putNGramBase) + static bool putNGramASCIICaseInsensitive(const UInt8 * pos, int offset, Callback && putNGramBase) { struct Chars { @@ -115,7 +115,7 @@ namespace VolnitskyTraits } template - static inline bool putNGramUTF8CaseInsensitive( + static bool putNGramUTF8CaseInsensitive( const UInt8 * pos, int offset, const UInt8 * begin, size_t size, Callback && putNGramBase) { const UInt8 * end = begin + size; @@ -349,7 +349,7 @@ namespace VolnitskyTraits } template - static inline bool putNGram(const UInt8 * pos, int offset, [[maybe_unused]] const UInt8 * begin, size_t size, Callback && putNGramBase) + static bool putNGram(const UInt8 * pos, int offset, [[maybe_unused]] const UInt8 * begin, size_t size, Callback && putNGramBase) { if constexpr (CaseSensitive) { @@ -580,7 +580,7 @@ public: return true; } - inline bool searchOne(const UInt8 * haystack, const UInt8 * haystack_end) const + bool searchOne(const UInt8 * haystack, const UInt8 * haystack_end) const { const size_t fallback_size = fallback_needles.size(); for (size_t i = 0; i < fallback_size; ++i) @@ -609,7 +609,7 @@ public: return false; } - inline size_t searchOneFirstIndex(const UInt8 * haystack, const UInt8 * haystack_end) const + size_t searchOneFirstIndex(const UInt8 * haystack, const UInt8 * haystack_end) const { const size_t fallback_size = fallback_needles.size(); @@ -647,7 +647,7 @@ public: } template - inline UInt64 searchOneFirstPosition(const UInt8 * haystack, const UInt8 * haystack_end, const CountCharsCallback & count_chars) const + UInt64 searchOneFirstPosition(const UInt8 * haystack, const UInt8 * haystack_end, const CountCharsCallback & count_chars) const { const size_t fallback_size = fallback_needles.size(); @@ -682,7 +682,7 @@ public: } template - inline void searchOneAll(const UInt8 * haystack, const UInt8 * haystack_end, AnsType * answer, const CountCharsCallback & count_chars) const + void searchOneAll(const UInt8 * haystack, const UInt8 * haystack_end, AnsType * answer, const CountCharsCallback & count_chars) const { const size_t fallback_size = fallback_needles.size(); for (size_t i = 0; i < fallback_size; ++i) diff --git a/src/Common/ZooKeeper/IKeeper.h b/src/Common/ZooKeeper/IKeeper.h index ec49c94808e..ddd30c4eef2 100644 --- a/src/Common/ZooKeeper/IKeeper.h +++ b/src/Common/ZooKeeper/IKeeper.h @@ -491,12 +491,12 @@ public: incrementErrorMetrics(code); } - inline static Exception createDeprecated(const std::string & msg, Error code_) + static Exception createDeprecated(const std::string & msg, Error code_) { return Exception(msg, code_, 0); } - inline static Exception fromPath(Error code_, const std::string & path) + static Exception fromPath(Error code_, const std::string & path) { return Exception(code_, "Coordination error: {}, path {}", errorMessage(code_), path); } @@ -504,7 +504,7 @@ public: /// Message must be a compile-time constant template requires std::is_convertible_v - inline static Exception fromMessage(Error code_, T && message) + static Exception fromMessage(Error code_, T && message) { return Exception(std::forward(message), code_); } diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.cpp b/src/Common/ZooKeeper/ZooKeeperImpl.cpp index 2185d32e47a..ed7498b1ac9 100644 --- a/src/Common/ZooKeeper/ZooKeeperImpl.cpp +++ b/src/Common/ZooKeeper/ZooKeeperImpl.cpp @@ -1259,11 +1259,13 @@ void ZooKeeper::initFeatureFlags() void ZooKeeper::executeGenericRequest( const ZooKeeperRequestPtr & request, - ResponseCallback callback) + ResponseCallback callback, + WatchCallbackPtr watch) { RequestInfo request_info; request_info.request = request; request_info.callback = callback; + request_info.watch = watch; pushRequest(std::move(request_info)); } diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.h b/src/Common/ZooKeeper/ZooKeeperImpl.h index cf331a03d06..8fdf0f97d9d 100644 --- a/src/Common/ZooKeeper/ZooKeeperImpl.h +++ b/src/Common/ZooKeeper/ZooKeeperImpl.h @@ -139,7 +139,8 @@ public: void executeGenericRequest( const ZooKeeperRequestPtr & request, - ResponseCallback callback); + ResponseCallback callback, + WatchCallbackPtr watch = nullptr); /// See the documentation about semantics of these methods in IKeeper class. diff --git a/src/Common/tests/gtest_async_loader.cpp b/src/Common/tests/gtest_async_loader.cpp index 174997ddf14..9fda58b9008 100644 --- a/src/Common/tests/gtest_async_loader.cpp +++ b/src/Common/tests/gtest_async_loader.cpp @@ -35,6 +35,7 @@ namespace DB::ErrorCodes extern const int ASYNC_LOAD_CYCLE; extern const int ASYNC_LOAD_FAILED; extern const int ASYNC_LOAD_CANCELED; + extern const int ASYNC_LOAD_WAIT_FAILED; } struct Initializer { @@ -262,7 +263,8 @@ TEST(AsyncLoader, CancelPendingJob) } catch (Exception & e) { - ASSERT_EQ(e.code(), ErrorCodes::ASYNC_LOAD_CANCELED); + ASSERT_EQ(e.code(), ErrorCodes::ASYNC_LOAD_WAIT_FAILED); + ASSERT_TRUE(e.message().contains("ASYNC_LOAD_CANCELED")); } } @@ -288,7 +290,8 @@ TEST(AsyncLoader, CancelPendingTask) } catch (Exception & e) { - ASSERT_TRUE(e.code() == ErrorCodes::ASYNC_LOAD_CANCELED); + ASSERT_EQ(e.code(), ErrorCodes::ASYNC_LOAD_WAIT_FAILED); + ASSERT_TRUE(e.message().contains("ASYNC_LOAD_CANCELED")); } try @@ -298,7 +301,8 @@ TEST(AsyncLoader, CancelPendingTask) } catch (Exception & e) { - ASSERT_TRUE(e.code() == ErrorCodes::ASYNC_LOAD_CANCELED); + ASSERT_EQ(e.code(), ErrorCodes::ASYNC_LOAD_WAIT_FAILED); + ASSERT_TRUE(e.message().contains("ASYNC_LOAD_CANCELED")); } } @@ -325,7 +329,8 @@ TEST(AsyncLoader, CancelPendingDependency) } catch (Exception & e) { - ASSERT_TRUE(e.code() == ErrorCodes::ASYNC_LOAD_CANCELED); + ASSERT_EQ(e.code(), ErrorCodes::ASYNC_LOAD_WAIT_FAILED); + ASSERT_TRUE(e.message().contains("ASYNC_LOAD_CANCELED")); } try @@ -335,7 +340,8 @@ TEST(AsyncLoader, CancelPendingDependency) } catch (Exception & e) { - ASSERT_TRUE(e.code() == ErrorCodes::ASYNC_LOAD_CANCELED); + ASSERT_EQ(e.code(), ErrorCodes::ASYNC_LOAD_WAIT_FAILED); + ASSERT_TRUE(e.message().contains("ASYNC_LOAD_CANCELED")); } } @@ -451,8 +457,9 @@ TEST(AsyncLoader, JobFailure) } catch (Exception & e) { - ASSERT_EQ(e.code(), ErrorCodes::ASYNC_LOAD_FAILED); - ASSERT_TRUE(e.message().find(error_message) != String::npos); + ASSERT_EQ(e.code(), ErrorCodes::ASYNC_LOAD_WAIT_FAILED); + ASSERT_TRUE(e.message().contains(error_message)); + ASSERT_TRUE(e.message().contains("ASYNC_LOAD_FAILED")); } } @@ -489,8 +496,9 @@ TEST(AsyncLoader, ScheduleJobWithFailedDependencies) } catch (Exception & e) { - ASSERT_EQ(e.code(), ErrorCodes::ASYNC_LOAD_CANCELED); - ASSERT_TRUE(e.message().find(error_message) != String::npos); + ASSERT_EQ(e.code(), ErrorCodes::ASYNC_LOAD_WAIT_FAILED); + ASSERT_TRUE(e.message().contains("ASYNC_LOAD_CANCELED")); + ASSERT_TRUE(e.message().contains(error_message)); } try { @@ -499,8 +507,9 @@ TEST(AsyncLoader, ScheduleJobWithFailedDependencies) } catch (Exception & e) { - ASSERT_EQ(e.code(), ErrorCodes::ASYNC_LOAD_CANCELED); - ASSERT_TRUE(e.message().find(error_message) != String::npos); + ASSERT_EQ(e.code(), ErrorCodes::ASYNC_LOAD_WAIT_FAILED); + ASSERT_TRUE(e.message().contains("ASYNC_LOAD_CANCELED")); + ASSERT_TRUE(e.message().contains(error_message)); } } @@ -531,7 +540,8 @@ TEST(AsyncLoader, ScheduleJobWithCanceledDependencies) } catch (Exception & e) { - ASSERT_EQ(e.code(), ErrorCodes::ASYNC_LOAD_CANCELED); + ASSERT_EQ(e.code(), ErrorCodes::ASYNC_LOAD_WAIT_FAILED); + ASSERT_TRUE(e.message().contains("ASYNC_LOAD_CANCELED")); } try { @@ -540,7 +550,8 @@ TEST(AsyncLoader, ScheduleJobWithCanceledDependencies) } catch (Exception & e) { - ASSERT_EQ(e.code(), ErrorCodes::ASYNC_LOAD_CANCELED); + ASSERT_EQ(e.code(), ErrorCodes::ASYNC_LOAD_WAIT_FAILED); + ASSERT_TRUE(e.message().contains("ASYNC_LOAD_CANCELED")); } } diff --git a/src/Common/tryGetFileNameByFileDescriptor.cpp b/src/Common/tryGetFileNameByFileDescriptor.cpp new file mode 100644 index 00000000000..47e81050388 --- /dev/null +++ b/src/Common/tryGetFileNameByFileDescriptor.cpp @@ -0,0 +1,33 @@ +#include + +#ifdef OS_LINUX +# include +#elif defined(OS_DARWIN) +# include +#endif + +#include + + +namespace DB +{ +std::optional tryGetFileNameFromFileDescriptor(int fd) +{ +#ifdef OS_LINUX + std::string proc_path = fmt::format("/proc/self/fd/{}", fd); + char file_path[PATH_MAX] = {'\0'}; + if (readlink(proc_path.c_str(), file_path, sizeof(file_path) - 1) != -1) + return file_path; + return std::nullopt; +#elif defined(OS_DARWIN) + char file_path[PATH_MAX] = {'\0'}; + if (fcntl(fd, F_GETPATH, file_path) != -1) + return file_path; + return std::nullopt; +#else + (void)fd; + return std::nullopt; +#endif +} + +} diff --git a/src/Common/tryGetFileNameByFileDescriptor.h b/src/Common/tryGetFileNameByFileDescriptor.h new file mode 100644 index 00000000000..c38ccb4f851 --- /dev/null +++ b/src/Common/tryGetFileNameByFileDescriptor.h @@ -0,0 +1,10 @@ +#pragma once + +#include +#include + +namespace DB +{ +/// Supports only Linux/MacOS. On other platforms, returns nullopt. +std::optional tryGetFileNameFromFileDescriptor(int fd); +} diff --git a/src/Coordination/Standalone/Context.cpp b/src/Coordination/Standalone/Context.cpp index bae6328a328..4b14b038852 100644 --- a/src/Coordination/Standalone/Context.cpp +++ b/src/Coordination/Standalone/Context.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -145,9 +146,10 @@ struct ContextSharedPart : boost::noncopyable mutable ThrottlerPtr local_read_throttler; /// A server-wide throttler for local IO reads mutable ThrottlerPtr local_write_throttler; /// A server-wide throttler for local IO writes + std::optional storage_s3_settings TSA_GUARDED_BY(mutex); /// Settings of S3 storage + mutable std::mutex keeper_dispatcher_mutex; mutable std::shared_ptr keeper_dispatcher TSA_GUARDED_BY(keeper_dispatcher_mutex); - }; ContextData::ContextData() = default; @@ -453,6 +455,19 @@ std::shared_ptr Context::getZooKeeper() const throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Cannot connect to ZooKeeper from Keeper"); } +const StorageS3Settings & Context::getStorageS3Settings() const +{ + std::lock_guard lock(shared->mutex); + + if (!shared->storage_s3_settings) + { + const auto & config = shared->config ? *shared->config : Poco::Util::Application::instance().config(); + shared->storage_s3_settings.emplace().loadFromConfig("s3", config, getSettingsRef()); + } + + return *shared->storage_s3_settings; +} + const ServerSettings & Context::getServerSettings() const { return shared->server_settings; diff --git a/src/Coordination/Standalone/Context.h b/src/Coordination/Standalone/Context.h index 3df3649c498..7e4d1794f7d 100644 --- a/src/Coordination/Standalone/Context.h +++ b/src/Coordination/Standalone/Context.h @@ -37,6 +37,7 @@ class FilesystemCacheLog; class FilesystemReadPrefetchesLog; class BlobStorageLog; class IOUringReader; +class StorageS3Settings; /// A small class which owns ContextShared. /// We don't use something like unique_ptr directly to allow ContextShared type to be incomplete. @@ -162,6 +163,10 @@ public: zkutil::ZooKeeperPtr getZooKeeper() const; + const StorageS3Settings & getStorageS3Settings() const; + + const String & getUserName() const { static std::string user; return user; } + const ServerSettings & getServerSettings() const; bool hasTraceCollector() const; diff --git a/src/Core/Field.h b/src/Core/Field.h index 4424d669c4d..73d3f4ec44e 100644 --- a/src/Core/Field.h +++ b/src/Core/Field.h @@ -855,13 +855,13 @@ template <> struct Field::EnumToType { usi template <> struct Field::EnumToType { using Type = CustomType; }; template <> struct Field::EnumToType { using Type = UInt64; }; -inline constexpr bool isInt64OrUInt64FieldType(Field::Types::Which t) +constexpr bool isInt64OrUInt64FieldType(Field::Types::Which t) { return t == Field::Types::Int64 || t == Field::Types::UInt64; } -inline constexpr bool isInt64OrUInt64orBoolFieldType(Field::Types::Which t) +constexpr bool isInt64OrUInt64orBoolFieldType(Field::Types::Which t) { return t == Field::Types::Int64 || t == Field::Types::UInt64 diff --git a/src/Core/Joins.h b/src/Core/Joins.h index ccdd6eefab7..96d2b51325c 100644 --- a/src/Core/Joins.h +++ b/src/Core/Joins.h @@ -19,16 +19,16 @@ enum class JoinKind : uint8_t const char * toString(JoinKind kind); -inline constexpr bool isLeft(JoinKind kind) { return kind == JoinKind::Left; } -inline constexpr bool isRight(JoinKind kind) { return kind == JoinKind::Right; } -inline constexpr bool isInner(JoinKind kind) { return kind == JoinKind::Inner; } -inline constexpr bool isFull(JoinKind kind) { return kind == JoinKind::Full; } -inline constexpr bool isCrossOrComma(JoinKind kind) { return kind == JoinKind::Comma || kind == JoinKind::Cross; } -inline constexpr bool isRightOrFull(JoinKind kind) { return kind == JoinKind::Right || kind == JoinKind::Full; } -inline constexpr bool isLeftOrFull(JoinKind kind) { return kind == JoinKind::Left || kind == JoinKind::Full; } -inline constexpr bool isInnerOrRight(JoinKind kind) { return kind == JoinKind::Inner || kind == JoinKind::Right; } -inline constexpr bool isInnerOrLeft(JoinKind kind) { return kind == JoinKind::Inner || kind == JoinKind::Left; } -inline constexpr bool isPaste(JoinKind kind) { return kind == JoinKind::Paste; } +constexpr bool isLeft(JoinKind kind) { return kind == JoinKind::Left; } +constexpr bool isRight(JoinKind kind) { return kind == JoinKind::Right; } +constexpr bool isInner(JoinKind kind) { return kind == JoinKind::Inner; } +constexpr bool isFull(JoinKind kind) { return kind == JoinKind::Full; } +constexpr bool isCrossOrComma(JoinKind kind) { return kind == JoinKind::Comma || kind == JoinKind::Cross; } +constexpr bool isRightOrFull(JoinKind kind) { return kind == JoinKind::Right || kind == JoinKind::Full; } +constexpr bool isLeftOrFull(JoinKind kind) { return kind == JoinKind::Left || kind == JoinKind::Full; } +constexpr bool isInnerOrRight(JoinKind kind) { return kind == JoinKind::Inner || kind == JoinKind::Right; } +constexpr bool isInnerOrLeft(JoinKind kind) { return kind == JoinKind::Inner || kind == JoinKind::Left; } +constexpr bool isPaste(JoinKind kind) { return kind == JoinKind::Paste; } /// Allows more optimal JOIN for typical cases. enum class JoinStrictness : uint8_t @@ -66,7 +66,7 @@ enum class ASOFJoinInequality : uint8_t const char * toString(ASOFJoinInequality asof_join_inequality); -inline constexpr ASOFJoinInequality getASOFJoinInequality(std::string_view func_name) +constexpr ASOFJoinInequality getASOFJoinInequality(std::string_view func_name) { ASOFJoinInequality inequality = ASOFJoinInequality::None; @@ -82,7 +82,7 @@ inline constexpr ASOFJoinInequality getASOFJoinInequality(std::string_view func_ return inequality; } -inline constexpr ASOFJoinInequality reverseASOFJoinInequality(ASOFJoinInequality inequality) +constexpr ASOFJoinInequality reverseASOFJoinInequality(ASOFJoinInequality inequality) { if (inequality == ASOFJoinInequality::Less) return ASOFJoinInequality::Greater; diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h index 524d6ec07c2..45f235116ab 100644 --- a/src/Core/ServerSettings.h +++ b/src/Core/ServerSettings.h @@ -3,6 +3,7 @@ #include #include +#include namespace Poco::Util @@ -51,7 +52,7 @@ namespace DB M(UInt64, max_temporary_data_on_disk_size, 0, "The maximum amount of storage that could be used for external aggregation, joins or sorting., ", 0) \ M(String, temporary_data_in_cache, "", "Cache disk name for temporary data.", 0) \ M(UInt64, aggregate_function_group_array_max_element_size, 0xFFFFFF, "Max array element size in bytes for groupArray function. This limit is checked at serialization and help to avoid large state size.", 0) \ - M(Bool, aggregate_function_group_array_has_limit_size, false, "When the max array element size is exceeded, a `Too large array size` exception will be thrown by default. When set to true, no exception will be thrown, and the excess elements will be discarded.", 0) \ + M(GroupArrayActionWhenLimitReached, aggregate_function_group_array_action_when_limit_is_reached, GroupArrayActionWhenLimitReached::THROW, "Action to execute when max array element size is exceeded in groupArray: `throw` exception, or `discard` extra values", 0) \ M(UInt64, max_server_memory_usage, 0, "Maximum total memory usage of the server in bytes. Zero means unlimited.", 0) \ M(Double, max_server_memory_usage_to_ram_ratio, 0.9, "Same as max_server_memory_usage but in to RAM ratio. Allows to lower max memory on low-memory systems.", 0) \ M(UInt64, merges_mutations_memory_usage_soft_limit, 0, "Maximum total memory usage for merges and mutations in bytes. Zero means unlimited.", 0) \ @@ -97,6 +98,8 @@ namespace DB M(UInt64, max_table_size_to_drop, 50000000000lu, "If size of a table is greater than this value (in bytes) than table could not be dropped with any DROP query.", 0) \ M(UInt64, max_partition_size_to_drop, 50000000000lu, "Same as max_table_size_to_drop, but for the partitions.", 0) \ M(UInt64, max_table_num_to_warn, 5000lu, "If number of tables is greater than this value, server will create a warning that will displayed to user.", 0) \ + M(UInt64, max_view_num_to_warn, 10000lu, "If number of views is greater than this value, server will create a warning that will displayed to user.", 0) \ + M(UInt64, max_dictionary_num_to_warn, 1000lu, "If number of dictionaries is greater than this value, server will create a warning that will displayed to user.", 0) \ M(UInt64, max_database_num_to_warn, 1000lu, "If number of databases is greater than this value, server will create a warning that will displayed to user.", 0) \ M(UInt64, max_part_num_to_warn, 100000lu, "If number of databases is greater than this value, server will create a warning that will displayed to user.", 0) \ M(UInt64, concurrent_threads_soft_limit_num, 0, "Sets how many concurrent thread can be allocated before applying CPU pressure. Zero means unlimited.", 0) \ diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 21af27cc60b..f0389e7e2d5 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -116,6 +116,12 @@ class IColumn; M(Bool, s3_allow_parallel_part_upload, true, "Use multiple threads for s3 multipart upload. It may lead to slightly higher memory usage", 0) \ M(Bool, azure_allow_parallel_part_upload, true, "Use multiple threads for azure multipart upload.", 0) \ M(Bool, s3_throw_on_zero_files_match, false, "Throw an error, when ListObjects request cannot match any files", 0) \ + M(Bool, hdfs_throw_on_zero_files_match, false, "Throw an error, when ListObjects request cannot match any files", 0) \ + M(Bool, azure_throw_on_zero_files_match, false, "Throw an error, when ListObjects request cannot match any files", 0) \ + M(Bool, s3_ignore_file_doesnt_exist, false, "Return 0 rows when the requested files don't exist, instead of throwing an exception in S3 table engine", 0) \ + M(Bool, hdfs_ignore_file_doesnt_exist, false, "Return 0 rows when the requested files don't exist, instead of throwing an exception in HDFS table engine", 0) \ + M(Bool, azure_ignore_file_doesnt_exist, false, "Return 0 rows when the requested files don't exist, instead of throwing an exception in AzureBlobStorage table engine", 0) \ + M(Bool, s3_validate_request_settings, true, "Validate S3 request settings", 0) \ M(Bool, s3_disable_checksum, false, "Do not calculate a checksum when sending a file to S3. This speeds up writes by avoiding excessive processing passes on a file. It is mostly safe as the data of MergeTree tables is checksummed by ClickHouse anyway, and when S3 is accessed with HTTPS, the TLS layer already provides integrity while transferring through the network. While additional checksums on S3 give defense in depth.", 0) \ M(UInt64, s3_retry_attempts, 100, "Setting for Aws::Client::RetryStrategy, Aws::Client does retries itself, 0 means no retries", 0) \ M(UInt64, s3_request_timeout_ms, 30000, "Idleness timeout for sending and receiving data to/from S3. Fail if a single TCP read or write call blocks for this long.", 0) \ @@ -128,6 +134,7 @@ class IColumn; M(Bool, hdfs_truncate_on_insert, false, "Enables or disables truncate before insert in s3 engine tables", 0) \ M(Bool, hdfs_create_new_file_on_insert, false, "Enables or disables creating a new file on each insert in hdfs engine tables", 0) \ M(Bool, hdfs_skip_empty_files, false, "Allow to skip empty files in hdfs table engine", 0) \ + M(Bool, azure_skip_empty_files, false, "Allow to skip empty files in azure table engine", 0) \ M(UInt64, hsts_max_age, 0, "Expired time for hsts. 0 means disable HSTS.", 0) \ M(Bool, extremes, false, "Calculate minimums and maximums of the result columns. They can be output in JSON-formats.", IMPORTANT) \ M(Bool, use_uncompressed_cache, false, "Whether to use the cache of uncompressed blocks.", 0) \ @@ -881,6 +888,7 @@ class IColumn; M(Bool, traverse_shadow_remote_data_paths, false, "Traverse shadow directory when query system.remote_data_paths", 0) \ M(Bool, geo_distance_returns_float64_on_float64_arguments, true, "If all four arguments to `geoDistance`, `greatCircleDistance`, `greatCircleAngle` functions are Float64, return Float64 and use double precision for internal calculations. In previous ClickHouse versions, the functions always returned Float32.", 0) \ M(Bool, allow_get_client_http_header, false, "Allow to use the function `getClientHTTPHeader` which lets to obtain a value of an the current HTTP request's header. It is not enabled by default for security reasons, because some headers, such as `Cookie`, could contain sensitive info. Note that the `X-ClickHouse-*` and `Authentication` headers are always restricted and cannot be obtained with this function.", 0) \ + M(Bool, cast_string_to_dynamic_use_inference, false, "Use types inference during String to Dynamic conversion", 0) \ \ /** Experimental functions */ \ M(Bool, allow_experimental_materialized_postgresql_table, false, "Allows to use the MaterializedPostgreSQL table engine. Disabled by default, because this feature is experimental", 0) \ @@ -889,6 +897,7 @@ class IColumn; M(Bool, allow_experimental_hash_functions, false, "Enable experimental hash functions", 0) \ M(Bool, allow_experimental_object_type, false, "Allow Object and JSON data types", 0) \ M(Bool, allow_experimental_variant_type, false, "Allow Variant data type", 0) \ + M(Bool, allow_experimental_dynamic_type, false, "Allow Dynamic data type", 0) \ M(Bool, allow_experimental_annoy_index, false, "Allows to use Annoy index. Disabled by default because this feature is experimental", 0) \ M(Bool, allow_experimental_usearch_index, false, "Allows to use USearch index. Disabled by default because this feature is experimental", 0) \ M(UInt64, max_limit_for_ann_queries, 1'000'000, "SELECT queries with LIMIT bigger than this setting cannot use ANN indexes. Helps to prevent memory overflows in ANN search indexes.", 0) \ @@ -1014,6 +1023,7 @@ class IColumn; M(Bool, input_format_parquet_case_insensitive_column_matching, false, "Ignore case when matching Parquet columns with CH columns.", 0) \ M(Bool, input_format_parquet_preserve_order, false, "Avoid reordering rows when reading from Parquet files. Usually makes it much slower.", 0) \ M(Bool, input_format_parquet_filter_push_down, true, "When reading Parquet files, skip whole row groups based on the WHERE/PREWHERE expressions and min/max statistics in the Parquet metadata.", 0) \ + M(Bool, input_format_parquet_use_native_reader, false, "When reading Parquet files, to use native reader instead of arrow reader.", 0) \ M(Bool, input_format_allow_seeks, true, "Allow seeks while reading in ORC/Parquet/Arrow input formats", 0) \ M(Bool, input_format_orc_allow_missing_columns, true, "Allow missing columns while reading ORC input formats", 0) \ M(Bool, input_format_orc_use_fast_decoder, true, "Use a faster ORC decoder implementation.", 0) \ @@ -1079,6 +1089,7 @@ class IColumn; M(Bool, input_format_csv_skip_trailing_empty_lines, false, "Skip trailing empty lines in CSV format", 0) \ M(Bool, input_format_tsv_skip_trailing_empty_lines, false, "Skip trailing empty lines in TSV format", 0) \ M(Bool, input_format_custom_skip_trailing_empty_lines, false, "Skip trailing empty lines in CustomSeparated format", 0) \ + M(Bool, input_format_tsv_crlf_end_of_line, false, "If it is set true, file function will read TSV format with \\r\\n instead of \\n.", 0) \ \ M(Bool, input_format_native_allow_types_conversion, true, "Allow data types conversion in Native input format", 0) \ \ diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index d512e3bc3ae..66341876912 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -85,14 +85,26 @@ namespace SettingsChangesHistory /// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972) static std::map settings_changes_history = { + {"24.6", {{"input_format_parquet_use_native_reader", false, false, "When reading Parquet files, to use native reader instead of arrow reader."}, + {"hdfs_throw_on_zero_files_match", false, false, "Allow to throw an error when ListObjects request cannot match any files in HDFS engine instead of empty query result"}, + {"azure_throw_on_zero_files_match", false, false, "Allow to throw an error when ListObjects request cannot match any files in AzureBlobStorage engine instead of empty query result"}, + {"s3_validate_request_settings", true, true, "Allow to disable S3 request settings validation"}, + {"azure_skip_empty_files", false, false, "Allow to skip empty files in azure table engine"}, + {"hdfs_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in HDFS table engine"}, + {"azure_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in AzureBlobStorage table engine"}, + {"s3_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in S3 table engine"}, + }}, {"24.5", {{"allow_deprecated_functions", true, false, "Allow usage of deprecated functions"}, {"allow_experimental_join_condition", false, false, "Support join with inequal conditions which involve columns from both left and right table. e.g. t1.y < t2.y."}, + {"input_format_tsv_crlf_end_of_line", false, false, "Enables reading of CRLF line endings with TSV formats"}, {"output_format_parquet_use_custom_encoder", false, true, "Enable custom Parquet encoder."}, - {"cross_join_min_rows_to_compress", 0, 10000000, "A new setting."}, - {"cross_join_min_bytes_to_compress", 0, 1_GiB, "A new setting."}, + {"cross_join_min_rows_to_compress", 0, 10000000, "Minimal count of rows to compress block in CROSS JOIN. Zero value means - disable this threshold. This block is compressed when any of the two thresholds (by rows or by bytes) are reached."}, + {"cross_join_min_bytes_to_compress", 0, 1_GiB, "Minimal size of block to compress in CROSS JOIN. Zero value means - disable this threshold. This block is compressed when any of the two thresholds (by rows or by bytes) are reached."}, {"http_max_chunk_size", 0, 0, "Internal limitation"}, {"prefer_external_sort_block_bytes", 0, DEFAULT_BLOCK_SIZE * 256, "Prefer maximum block bytes for external sort, reduce the memory usage during merging."}, {"input_format_force_null_for_omitted_fields", false, false, "Disable type-defaults for omitted fields when needed"}, + {"cast_string_to_dynamic_use_inference", false, false, "Add setting to allow converting String to Dynamic through parsing"}, + {"allow_experimental_dynamic_type", false, false, "Add new experimental Dynamic type"}, {"azure_max_blocks_in_multipart_upload", 50000, 50000, "Maximum number of blocks in multipart upload for Azure."}, }}, {"24.4", {{"input_format_json_throw_on_bad_escape_sequence", true, true, "Allow to save JSON strings with bad escape sequences"}, diff --git a/src/Core/SettingsEnums.cpp b/src/Core/SettingsEnums.cpp index 0caf6e8d609..05985316566 100644 --- a/src/Core/SettingsEnums.cpp +++ b/src/Core/SettingsEnums.cpp @@ -229,4 +229,9 @@ IMPLEMENT_SETTING_ENUM(SQLSecurityType, ErrorCodes::BAD_ARGUMENTS, {{"DEFINER", SQLSecurityType::DEFINER}, {"INVOKER", SQLSecurityType::INVOKER}, {"NONE", SQLSecurityType::NONE}}) + +IMPLEMENT_SETTING_ENUM( + GroupArrayActionWhenLimitReached, + ErrorCodes::BAD_ARGUMENTS, + {{"throw", GroupArrayActionWhenLimitReached::THROW}, {"discard", GroupArrayActionWhenLimitReached::DISCARD}}) } diff --git a/src/Core/SettingsEnums.h b/src/Core/SettingsEnums.h index ab163ba96a3..575cd8700c8 100644 --- a/src/Core/SettingsEnums.h +++ b/src/Core/SettingsEnums.h @@ -370,4 +370,12 @@ DECLARE_SETTING_ENUM(SchemaInferenceMode) DECLARE_SETTING_ENUM_WITH_RENAME(DateTimeOverflowBehavior, FormatSettings::DateTimeOverflowBehavior) DECLARE_SETTING_ENUM(SQLSecurityType) + +enum class GroupArrayActionWhenLimitReached : uint8_t +{ + THROW, + DISCARD +}; +DECLARE_SETTING_ENUM(GroupArrayActionWhenLimitReached) + } diff --git a/src/Core/TypeId.h b/src/Core/TypeId.h index e6c5454b3bb..e4f850cbb59 100644 --- a/src/Core/TypeId.h +++ b/src/Core/TypeId.h @@ -50,6 +50,7 @@ enum class TypeIndex : uint8_t IPv6, JSONPaths, Variant, + Dynamic }; /** diff --git a/src/Daemon/BaseDaemon.h b/src/Daemon/BaseDaemon.h index a0f47c44460..3d34d404595 100644 --- a/src/Daemon/BaseDaemon.h +++ b/src/Daemon/BaseDaemon.h @@ -40,7 +40,7 @@ class BaseDaemon : public Poco::Util::ServerApplication, public Loggers friend class SignalListener; public: - static inline constexpr char DEFAULT_GRAPHITE_CONFIG_NAME[] = "graphite"; + static constexpr char DEFAULT_GRAPHITE_CONFIG_NAME[] = "graphite"; BaseDaemon(); ~BaseDaemon() override; diff --git a/src/DataTypes/DataTypeArray.cpp b/src/DataTypes/DataTypeArray.cpp index 6e5760933eb..806a1577a21 100644 --- a/src/DataTypes/DataTypeArray.cpp +++ b/src/DataTypes/DataTypeArray.cpp @@ -75,6 +75,27 @@ void DataTypeArray::forEachChild(const ChildCallback & callback) const nested->forEachChild(callback); } +std::unique_ptr DataTypeArray::getDynamicSubcolumnData(std::string_view subcolumn_name, const DB::IDataType::SubstreamData & data, bool throw_if_null) const +{ + auto nested_type = assert_cast(*data.type).nested; + auto nested_data = std::make_unique(nested_type->getDefaultSerialization()); + nested_data->type = nested_type; + nested_data->column = data.column ? assert_cast(*data.column).getDataPtr() : nullptr; + + auto nested_subcolumn_data = nested_type->getSubcolumnData(subcolumn_name, *nested_data, throw_if_null); + if (!nested_subcolumn_data) + return nullptr; + + auto creator = SerializationArray::SubcolumnCreator(data.column ? assert_cast(*data.column).getOffsetsPtr() : nullptr); + auto res = std::make_unique(); + res->serialization = creator.create(nested_subcolumn_data->serialization); + res->type = creator.create(nested_subcolumn_data->type); + if (data.column) + res->column = creator.create(nested_subcolumn_data->column); + + return res; +} + static DataTypePtr create(const ASTPtr & arguments) { if (!arguments || arguments->children.size() != 1) diff --git a/src/DataTypes/DataTypeArray.h b/src/DataTypes/DataTypeArray.h index 4423f137e1a..b242d871c36 100644 --- a/src/DataTypes/DataTypeArray.h +++ b/src/DataTypes/DataTypeArray.h @@ -55,7 +55,12 @@ public: bool textCanContainOnlyValidUTF8() const override { return nested->textCanContainOnlyValidUTF8(); } bool isComparable() const override { return nested->isComparable(); } bool canBeComparedWithCollation() const override { return nested->canBeComparedWithCollation(); } - bool hasDynamicSubcolumns() const override { return nested->hasDynamicSubcolumns(); } + bool hasDynamicSubcolumnsDeprecated() const override { return nested->hasDynamicSubcolumnsDeprecated(); } + + /// Array column doesn't have subcolumns by itself but allows to read subcolumns of nested column. + /// If nested column has dynamic subcolumns, Array of this type should also be able to read these dynamic subcolumns. + bool hasDynamicSubcolumnsData() const override { return nested->hasDynamicSubcolumnsData(); } + std::unique_ptr getDynamicSubcolumnData(std::string_view subcolumn_name, const SubstreamData & data, bool throw_if_null) const override; bool isValueUnambiguouslyRepresentedInContiguousMemoryRegion() const override { diff --git a/src/DataTypes/DataTypeDecimalBase.h b/src/DataTypes/DataTypeDecimalBase.h index 642d2de833f..997c554059b 100644 --- a/src/DataTypes/DataTypeDecimalBase.h +++ b/src/DataTypes/DataTypeDecimalBase.h @@ -147,7 +147,7 @@ public: static T getScaleMultiplier(UInt32 scale); - inline DecimalUtils::DataTypeDecimalTrait getTrait() const + DecimalUtils::DataTypeDecimalTrait getTrait() const { return {precision, scale}; } diff --git a/src/DataTypes/DataTypeDynamic.cpp b/src/DataTypes/DataTypeDynamic.cpp new file mode 100644 index 00000000000..c920e69c13b --- /dev/null +++ b/src/DataTypes/DataTypeDynamic.cpp @@ -0,0 +1,144 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_COLUMN; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int UNEXPECTED_AST_STRUCTURE; +} + +DataTypeDynamic::DataTypeDynamic(size_t max_dynamic_types_) : max_dynamic_types(max_dynamic_types_) +{ +} + +MutableColumnPtr DataTypeDynamic::createColumn() const +{ + return ColumnDynamic::create(max_dynamic_types); +} + +String DataTypeDynamic::doGetName() const +{ + if (max_dynamic_types == DEFAULT_MAX_DYNAMIC_TYPES) + return "Dynamic"; + return "Dynamic(max_types=" + toString(max_dynamic_types) + ")"; +} + +Field DataTypeDynamic::getDefault() const +{ + return Field(Null()); +} + +SerializationPtr DataTypeDynamic::doGetDefaultSerialization() const +{ + return std::make_shared(max_dynamic_types); +} + +static DataTypePtr create(const ASTPtr & arguments) +{ + if (!arguments || arguments->children.empty()) + return std::make_shared(); + + if (arguments->children.size() > 1) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Dynamic data type can have only one optional argument - the maximum number of dynamic types in a form 'Dynamic(max_types=N)"); + + + const auto * argument = arguments->children[0]->as(); + if (!argument || argument->name != "equals") + throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE, "Dynamic data type argument should be in a form 'max_types=N'"); + + auto identifier_name = argument->arguments->children[0]->as()->name(); + if (identifier_name != "max_types") + throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE, "Unexpected identifier: {}. Dynamic data type argument should be in a form 'max_types=N'", identifier_name); + + auto * literal = argument->arguments->children[1]->as(); + + if (!literal || literal->value.getType() != Field::Types::UInt64 || literal->value.get() == 0 || literal->value.get() > 255) + throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE, "'max_types' argument for Dynamic type should be a positive integer between 1 and 255"); + + return std::make_shared(literal->value.get()); +} + +void registerDataTypeDynamic(DataTypeFactory & factory) +{ + factory.registerDataType("Dynamic", create); +} + +std::unique_ptr DataTypeDynamic::getDynamicSubcolumnData(std::string_view subcolumn_name, const DB::IDataType::SubstreamData & data, bool throw_if_null) const +{ + auto [subcolumn_type_name, subcolumn_nested_name] = Nested::splitName(subcolumn_name); + /// Check if requested subcolumn is a valid data type. + auto subcolumn_type = DataTypeFactory::instance().tryGet(String(subcolumn_type_name)); + if (!subcolumn_type) + { + if (throw_if_null) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Dynamic type doesn't have subcolumn '{}'", subcolumn_type_name); + return nullptr; + } + + std::unique_ptr res = std::make_unique(subcolumn_type->getDefaultSerialization()); + res->type = subcolumn_type; + std::optional discriminator; + if (data.column) + { + /// If column was provided, we should extract subcolumn from Dynamic column. + const auto & dynamic_column = assert_cast(*data.column); + const auto & variant_info = dynamic_column.getVariantInfo(); + /// Check if provided Dynamic column has subcolumn of this type. + auto it = variant_info.variant_name_to_discriminator.find(subcolumn_type->getName()); + if (it != variant_info.variant_name_to_discriminator.end()) + { + discriminator = it->second; + res->column = dynamic_column.getVariantColumn().getVariantPtrByGlobalDiscriminator(*discriminator); + } + } + + /// Extract nested subcolumn of requested dynamic subcolumn if needed. + if (!subcolumn_nested_name.empty()) + { + res = getSubcolumnData(subcolumn_nested_name, *res, throw_if_null); + if (!res) + return nullptr; + } + + res->serialization = std::make_shared(res->serialization, subcolumn_type->getName()); + res->type = makeNullableOrLowCardinalityNullableSafe(res->type); + if (data.column) + { + if (discriminator) + { + /// Provided Dynamic column has subcolumn of this type, we should use VariantSubcolumnCreator to + /// create full subcolumn from variant according to discriminators. + const auto & variant_column = assert_cast(*data.column).getVariantColumn(); + auto creator = SerializationVariantElement::VariantSubcolumnCreator(variant_column.getLocalDiscriminatorsPtr(), "", *discriminator, variant_column.localDiscriminatorByGlobal(*discriminator)); + res->column = creator.create(res->column); + } + else + { + /// Provided Dynamic column doesn't have subcolumn of this type, just create column filled with default values. + auto column = res->type->createColumn(); + column->insertManyDefaults(data.column->size()); + res->column = std::move(column); + } + } + + return res; +} + +} diff --git a/src/DataTypes/DataTypeDynamic.h b/src/DataTypes/DataTypeDynamic.h new file mode 100644 index 00000000000..d5e4c5261ce --- /dev/null +++ b/src/DataTypes/DataTypeDynamic.h @@ -0,0 +1,55 @@ +#pragma once + +#include + +namespace DB +{ + +/// Dynamic type allows to store values of any type inside it and to read +/// subcolumns with any type without knowing all of them in advance. +class DataTypeDynamic final : public IDataType +{ +public: + static constexpr bool is_parametric = true; + + explicit DataTypeDynamic(size_t max_dynamic_types_ = DEFAULT_MAX_DYNAMIC_TYPES); + + TypeIndex getTypeId() const override { return TypeIndex::Dynamic; } + const char * getFamilyName() const override { return "Dynamic"; } + + bool isParametric() const override { return true; } + bool canBeInsideNullable() const override { return false; } + bool supportsSparseSerialization() const override { return false; } + bool canBeInsideSparseColumns() const override { return false; } + bool isComparable() const override { return true; } + + MutableColumnPtr createColumn() const override; + + Field getDefault() const override; + + /// 2 Dynamic types with different max_dynamic_types parameters are considered as different. + bool equals(const IDataType & rhs) const override + { + if (const auto * rhs_dynamic_type = typeid_cast(&rhs)) + return max_dynamic_types == rhs_dynamic_type->max_dynamic_types; + return false; + } + + bool haveSubtypes() const override { return false; } + + bool hasDynamicSubcolumnsData() const override { return true; } + std::unique_ptr getDynamicSubcolumnData(std::string_view subcolumn_name, const SubstreamData & data, bool throw_if_null) const override; + + size_t getMaxDynamicTypes() const { return max_dynamic_types; } + +private: + static constexpr size_t DEFAULT_MAX_DYNAMIC_TYPES = 32; + + SerializationPtr doGetDefaultSerialization() const override; + String doGetName() const override; + + size_t max_dynamic_types; +}; + +} + diff --git a/src/DataTypes/DataTypeFactory.cpp b/src/DataTypes/DataTypeFactory.cpp index 427af090b91..8c8f9999ada 100644 --- a/src/DataTypes/DataTypeFactory.cpp +++ b/src/DataTypes/DataTypeFactory.cpp @@ -292,6 +292,7 @@ DataTypeFactory::DataTypeFactory() registerDataTypeMap(*this); registerDataTypeObject(*this); registerDataTypeVariant(*this); + registerDataTypeDynamic(*this); } DataTypeFactory & DataTypeFactory::instance() diff --git a/src/DataTypes/DataTypeFactory.h b/src/DataTypes/DataTypeFactory.h index 4727cb3ae5c..86e0203358d 100644 --- a/src/DataTypes/DataTypeFactory.h +++ b/src/DataTypes/DataTypeFactory.h @@ -100,5 +100,6 @@ void registerDataTypeDomainSimpleAggregateFunction(DataTypeFactory & factory); void registerDataTypeDomainGeo(DataTypeFactory & factory); void registerDataTypeObject(DataTypeFactory & factory); void registerDataTypeVariant(DataTypeFactory & factory); +void registerDataTypeDynamic(DataTypeFactory & factory); } diff --git a/src/DataTypes/DataTypeMap.h b/src/DataTypes/DataTypeMap.h index 7281cca1bb1..4866c3e78cc 100644 --- a/src/DataTypes/DataTypeMap.h +++ b/src/DataTypes/DataTypeMap.h @@ -42,7 +42,7 @@ public: bool isComparable() const override { return key_type->isComparable() && value_type->isComparable(); } bool isParametric() const override { return true; } bool haveSubtypes() const override { return true; } - bool hasDynamicSubcolumns() const override { return nested->hasDynamicSubcolumns(); } + bool hasDynamicSubcolumnsDeprecated() const override { return nested->hasDynamicSubcolumnsDeprecated(); } const DataTypePtr & getKeyType() const { return key_type; } const DataTypePtr & getValueType() const { return value_type; } diff --git a/src/DataTypes/DataTypeObject.h b/src/DataTypes/DataTypeObject.h index 937a9091371..c610a1a8ba4 100644 --- a/src/DataTypes/DataTypeObject.h +++ b/src/DataTypes/DataTypeObject.h @@ -36,7 +36,7 @@ public: bool haveSubtypes() const override { return false; } bool equals(const IDataType & rhs) const override; bool isParametric() const override { return true; } - bool hasDynamicSubcolumns() const override { return true; } + bool hasDynamicSubcolumnsDeprecated() const override { return true; } SerializationPtr doGetDefaultSerialization() const override; diff --git a/src/DataTypes/DataTypeTuple.cpp b/src/DataTypes/DataTypeTuple.cpp index ebee096613d..6e32ed586ea 100644 --- a/src/DataTypes/DataTypeTuple.cpp +++ b/src/DataTypes/DataTypeTuple.cpp @@ -291,9 +291,9 @@ bool DataTypeTuple::haveMaximumSizeOfValue() const return std::all_of(elems.begin(), elems.end(), [](auto && elem) { return elem->haveMaximumSizeOfValue(); }); } -bool DataTypeTuple::hasDynamicSubcolumns() const +bool DataTypeTuple::hasDynamicSubcolumnsDeprecated() const { - return std::any_of(elems.begin(), elems.end(), [](auto && elem) { return elem->hasDynamicSubcolumns(); }); + return std::any_of(elems.begin(), elems.end(), [](auto && elem) { return elem->hasDynamicSubcolumnsDeprecated(); }); } bool DataTypeTuple::isComparable() const diff --git a/src/DataTypes/DataTypeTuple.h b/src/DataTypes/DataTypeTuple.h index 15561fe4286..fd00fce5a17 100644 --- a/src/DataTypes/DataTypeTuple.h +++ b/src/DataTypes/DataTypeTuple.h @@ -52,7 +52,7 @@ public: bool isComparable() const override; bool textCanContainOnlyValidUTF8() const override; bool haveMaximumSizeOfValue() const override; - bool hasDynamicSubcolumns() const override; + bool hasDynamicSubcolumnsDeprecated() const override; size_t getMaximumSizeOfValueInMemory() const override; size_t getSizeOfValueInMemory() const override; diff --git a/src/DataTypes/DataTypeVariant.cpp b/src/DataTypes/DataTypeVariant.cpp index 5989da90937..8a10ca7d06d 100644 --- a/src/DataTypes/DataTypeVariant.cpp +++ b/src/DataTypes/DataTypeVariant.cpp @@ -7,7 +7,6 @@ #include #include #include -#include #include #include #include @@ -18,7 +17,6 @@ namespace DB namespace ErrorCodes { extern const int BAD_ARGUMENTS; - extern const int EMPTY_DATA_PASSED; } @@ -33,6 +31,9 @@ DataTypeVariant::DataTypeVariant(const DataTypes & variants_) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Nullable/LowCardinality(Nullable) types are not allowed inside Variant type"); if (type->getTypeId() == TypeIndex::Variant) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Nested Variant types are not allowed"); + if (type->getTypeId() == TypeIndex::Dynamic) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Dynamic type is not allowed inside Variant type"); + /// Don't use Nothing type as a variant. if (!isNothing(type)) name_to_type[type->getName()] = type; @@ -42,9 +43,6 @@ DataTypeVariant::DataTypeVariant(const DataTypes & variants_) for (const auto & [_, type] : name_to_type) variants.push_back(type); - if (variants.empty()) - throw Exception(ErrorCodes::EMPTY_DATA_PASSED, "Variant cannot be empty"); - if (variants.size() > ColumnVariant::MAX_NESTED_COLUMNS) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Variant type with more than {} nested types is not allowed", ColumnVariant::MAX_NESTED_COLUMNS); } @@ -113,9 +111,16 @@ bool DataTypeVariant::equals(const IDataType & rhs) const return false; for (size_t i = 0; i < size; ++i) + { if (!variants[i]->equals(*rhs_variant.variants[i])) return false; + /// The same data types with different custom names considered different. + /// For example, UInt8 and Bool. + if ((variants[i]->hasCustomName() || rhs_variant.variants[i]) && variants[i]->getName() != rhs_variant.variants[i]->getName()) + return false; + } + return true; } @@ -129,17 +134,15 @@ bool DataTypeVariant::haveMaximumSizeOfValue() const return std::all_of(variants.begin(), variants.end(), [](auto && elem) { return elem->haveMaximumSizeOfValue(); }); } -bool DataTypeVariant::hasDynamicSubcolumns() const +bool DataTypeVariant::hasDynamicSubcolumnsDeprecated() const { - return std::any_of(variants.begin(), variants.end(), [](auto && elem) { return elem->hasDynamicSubcolumns(); }); + return std::any_of(variants.begin(), variants.end(), [](auto && elem) { return elem->hasDynamicSubcolumnsDeprecated(); }); } -std::optional DataTypeVariant::tryGetVariantDiscriminator(const IDataType & type) const +std::optional DataTypeVariant::tryGetVariantDiscriminator(const String & type_name) const { - String type_name = type.getName(); for (size_t i = 0; i != variants.size(); ++i) { - /// We don't use equals here, because it doesn't respect custom type names. if (variants[i]->getName() == type_name) return i; } @@ -183,7 +186,7 @@ void DataTypeVariant::forEachChild(const DB::IDataType::ChildCallback & callback static DataTypePtr create(const ASTPtr & arguments) { if (!arguments || arguments->children.empty()) - throw Exception(ErrorCodes::EMPTY_DATA_PASSED, "Variant cannot be empty"); + return std::make_shared(DataTypes{}); DataTypes nested_types; nested_types.reserve(arguments->children.size()); diff --git a/src/DataTypes/DataTypeVariant.h b/src/DataTypes/DataTypeVariant.h index ab471d37b2f..5ba1c3b40be 100644 --- a/src/DataTypes/DataTypeVariant.h +++ b/src/DataTypes/DataTypeVariant.h @@ -46,14 +46,14 @@ public: bool haveSubtypes() const override { return true; } bool textCanContainOnlyValidUTF8() const override; bool haveMaximumSizeOfValue() const override; - bool hasDynamicSubcolumns() const override; + bool hasDynamicSubcolumnsDeprecated() const override; size_t getMaximumSizeOfValueInMemory() const override; const DataTypePtr & getVariant(size_t i) const { return variants[i]; } const DataTypes & getVariants() const { return variants; } /// Check if Variant has provided type in the list of variants and return its discriminator. - std::optional tryGetVariantDiscriminator(const IDataType & type) const; + std::optional tryGetVariantDiscriminator(const String & type_name) const; void forEachChild(const ChildCallback & callback) const override; diff --git a/src/DataTypes/IDataType.cpp b/src/DataTypes/IDataType.cpp index 344b81be960..1c9715bbf53 100644 --- a/src/DataTypes/IDataType.cpp +++ b/src/DataTypes/IDataType.cpp @@ -101,14 +101,12 @@ void IDataType::forEachSubcolumn( data.serialization->enumerateStreams(settings, callback_with_data, data); } -template -Ptr IDataType::getForSubcolumn( +std::unique_ptr IDataType::getSubcolumnData( std::string_view subcolumn_name, const SubstreamData & data, - Ptr SubstreamData::*member, - bool throw_if_null) const + bool throw_if_null) { - Ptr res; + std::unique_ptr res; ISerialization::StreamCallback callback_with_data = [&](const auto & subpath) { @@ -120,7 +118,29 @@ Ptr IDataType::getForSubcolumn( auto name = ISerialization::getSubcolumnNameForStream(subpath, prefix_len); /// Create data from path only if it's requested subcolumn. if (name == subcolumn_name) - res = ISerialization::createFromPath(subpath, prefix_len).*member; + { + res = std::make_unique(ISerialization::createFromPath(subpath, prefix_len)); + } + /// Check if this subcolumn is a prefix of requested subcolumn and it can create dynamic subcolumns. + else if (subcolumn_name.starts_with(name + ".") && subpath[i].data.type && subpath[i].data.type->hasDynamicSubcolumnsData()) + { + auto dynamic_subcolumn_name = subcolumn_name.substr(name.size() + 1); + auto dynamic_subcolumn_data = subpath[i].data.type->getDynamicSubcolumnData(dynamic_subcolumn_name, subpath[i].data, false); + if (dynamic_subcolumn_data) + { + /// Create requested subcolumn using dynamic subcolumn data. + auto tmp_subpath = subpath; + if (tmp_subpath[i].creator) + { + dynamic_subcolumn_data->type = tmp_subpath[i].creator->create(dynamic_subcolumn_data->type); + dynamic_subcolumn_data->column = tmp_subpath[i].creator->create(dynamic_subcolumn_data->column); + dynamic_subcolumn_data->serialization = tmp_subpath[i].creator->create(dynamic_subcolumn_data->serialization); + } + + tmp_subpath[i].data = *dynamic_subcolumn_data; + res = std::make_unique(ISerialization::createFromPath(tmp_subpath, prefix_len)); + } + } } subpath[i].visited = true; } @@ -130,8 +150,11 @@ Ptr IDataType::getForSubcolumn( settings.position_independent_encoding = false; data.serialization->enumerateStreams(settings, callback_with_data, data); + if (!res && data.type->hasDynamicSubcolumnsData()) + return data.type->getDynamicSubcolumnData(subcolumn_name, data, throw_if_null); + if (!res && throw_if_null) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "There is no subcolumn {} in type {}", subcolumn_name, getName()); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "There is no subcolumn {} in type {}", subcolumn_name, data.type->getName()); return res; } @@ -141,34 +164,51 @@ bool IDataType::hasSubcolumn(std::string_view subcolumn_name) const return tryGetSubcolumnType(subcolumn_name) != nullptr; } +bool IDataType::hasDynamicSubcolumns() const +{ + if (hasDynamicSubcolumnsData()) + return true; + + bool has_dynamic_subcolumns = false; + auto data = SubstreamData(getDefaultSerialization()).withType(getPtr()); + auto callback = [&](const SubstreamPath &, const String &, const SubstreamData & subcolumn_data) + { + has_dynamic_subcolumns |= subcolumn_data.type->hasDynamicSubcolumnsData(); + }; + forEachSubcolumn(callback, data); + return has_dynamic_subcolumns; +} + DataTypePtr IDataType::tryGetSubcolumnType(std::string_view subcolumn_name) const { auto data = SubstreamData(getDefaultSerialization()).withType(getPtr()); - return getForSubcolumn(subcolumn_name, data, &SubstreamData::type, false); + auto subcolumn_data = getSubcolumnData(subcolumn_name, data, false); + return subcolumn_data ? subcolumn_data->type : nullptr; } DataTypePtr IDataType::getSubcolumnType(std::string_view subcolumn_name) const { auto data = SubstreamData(getDefaultSerialization()).withType(getPtr()); - return getForSubcolumn(subcolumn_name, data, &SubstreamData::type, true); + return getSubcolumnData(subcolumn_name, data, true)->type; } ColumnPtr IDataType::tryGetSubcolumn(std::string_view subcolumn_name, const ColumnPtr & column) const { - auto data = SubstreamData(getDefaultSerialization()).withColumn(column); - return getForSubcolumn(subcolumn_name, data, &SubstreamData::column, false); + auto data = SubstreamData(getDefaultSerialization()).withType(getPtr()).withColumn(column); + auto subcolumn_data = getSubcolumnData(subcolumn_name, data, false); + return subcolumn_data ? subcolumn_data->column : nullptr; } ColumnPtr IDataType::getSubcolumn(std::string_view subcolumn_name, const ColumnPtr & column) const { - auto data = SubstreamData(getDefaultSerialization()).withColumn(column); - return getForSubcolumn(subcolumn_name, data, &SubstreamData::column, true); + auto data = SubstreamData(getDefaultSerialization()).withType(getPtr()).withColumn(column); + return getSubcolumnData(subcolumn_name, data, true)->column; } SerializationPtr IDataType::getSubcolumnSerialization(std::string_view subcolumn_name, const SerializationPtr & serialization) const { - auto data = SubstreamData(serialization); - return getForSubcolumn(subcolumn_name, data, &SubstreamData::serialization, true); + auto data = SubstreamData(serialization).withType(getPtr()); + return getSubcolumnData(subcolumn_name, data, true)->serialization; } Names IDataType::getSubcolumnNames() const @@ -323,6 +363,7 @@ bool isMap(TYPE data_type) {return WhichDataType(data_type).isMap(); } \ bool isInterval(TYPE data_type) {return WhichDataType(data_type).isInterval(); } \ bool isObject(TYPE data_type) { return WhichDataType(data_type).isObject(); } \ bool isVariant(TYPE data_type) { return WhichDataType(data_type).isVariant(); } \ +bool isDynamic(TYPE data_type) { return WhichDataType(data_type).isDynamic(); } \ bool isNothing(TYPE data_type) { return WhichDataType(data_type).isNothing(); } \ \ bool isColumnedAsNumber(TYPE data_type) \ diff --git a/src/DataTypes/IDataType.h b/src/DataTypes/IDataType.h index eaf798a3017..46c30240ef8 100644 --- a/src/DataTypes/IDataType.h +++ b/src/DataTypes/IDataType.h @@ -11,6 +11,12 @@ namespace DB { +namespace ErrorCodes +{ + extern const int NOT_IMPLEMENTED; +} + + class ReadBuffer; class WriteBuffer; @@ -311,8 +317,13 @@ public: /// Strings, Numbers, Date, DateTime, Nullable virtual bool canBeInsideLowCardinality() const { return false; } - /// Object, Array(Object), Tuple(..., Object, ...) - virtual bool hasDynamicSubcolumns() const { return false; } + /// Checks for deprecated Object type usage recursively: Object, Array(Object), Tuple(..., Object, ...) + virtual bool hasDynamicSubcolumnsDeprecated() const { return false; } + + /// Checks if column has dynamic subcolumns. + virtual bool hasDynamicSubcolumns() const; + /// Checks if column can create dynamic subcolumns data and getDynamicSubcolumnData can be called. + virtual bool hasDynamicSubcolumnsData() const { return false; } /// Updates avg_value_size_hint for newly read column. Uses to optimize deserialization. Zero expected for first column. static void updateAvgValueSizeHint(const IColumn & column, double & avg_value_size_hint); @@ -329,16 +340,25 @@ protected: mutable SerializationPtr custom_serialization; public: + bool hasCustomName() const { return static_cast(custom_name.get()); } const IDataTypeCustomName * getCustomName() const { return custom_name.get(); } const ISerialization * getCustomSerialization() const { return custom_serialization.get(); } -private: - template - Ptr getForSubcolumn( +protected: + static std::unique_ptr getSubcolumnData( std::string_view subcolumn_name, const SubstreamData & data, - Ptr SubstreamData::*member, - bool throw_if_null) const; + bool throw_if_null); + + virtual std::unique_ptr getDynamicSubcolumnData( + std::string_view /*subcolumn_name*/, + const SubstreamData & /*data*/, + bool throw_if_null) const + { + if (throw_if_null) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getDynamicSubcolumnData() is not implemented for type {}", getName()); + return nullptr; + } }; @@ -423,6 +443,7 @@ struct WhichDataType constexpr bool isLowCardinality() const { return idx == TypeIndex::LowCardinality; } constexpr bool isVariant() const { return idx == TypeIndex::Variant; } + constexpr bool isDynamic() const { return idx == TypeIndex::Dynamic; } }; /// IDataType helpers (alternative for IDataType virtual methods with single point of truth) @@ -483,6 +504,7 @@ bool isMap(TYPE data_type); \ bool isInterval(TYPE data_type); \ bool isObject(TYPE data_type); \ bool isVariant(TYPE data_type); \ +bool isDynamic(TYPE data_type); \ bool isNothing(TYPE data_type); \ \ bool isColumnedAsNumber(TYPE data_type); \ diff --git a/src/DataTypes/ObjectUtils.cpp b/src/DataTypes/ObjectUtils.cpp index 7c671fcf44f..6993523bcb7 100644 --- a/src/DataTypes/ObjectUtils.cpp +++ b/src/DataTypes/ObjectUtils.cpp @@ -177,7 +177,7 @@ static std::pair convertObjectColumnToTuple( static std::pair recursivlyConvertDynamicColumnToTuple( const ColumnPtr & column, const DataTypePtr & type) { - if (!type->hasDynamicSubcolumns()) + if (!type->hasDynamicSubcolumnsDeprecated()) return {column, type}; if (const auto * type_object = typeid_cast(type.get())) @@ -243,7 +243,7 @@ void convertDynamicColumnsToTuples(Block & block, const StorageSnapshotPtr & sto { for (auto & column : block) { - if (!column.type->hasDynamicSubcolumns()) + if (!column.type->hasDynamicSubcolumnsDeprecated()) continue; std::tie(column.column, column.type) @@ -417,7 +417,7 @@ static DataTypePtr getLeastCommonTypeForTuple( static DataTypePtr getLeastCommonTypeForDynamicColumnsImpl( const DataTypePtr & type_in_storage, const DataTypes & concrete_types, bool check_ambiguos_paths) { - if (!type_in_storage->hasDynamicSubcolumns()) + if (!type_in_storage->hasDynamicSubcolumnsDeprecated()) return type_in_storage; if (isObject(type_in_storage)) @@ -459,7 +459,7 @@ DataTypePtr getLeastCommonTypeForDynamicColumns( DataTypePtr createConcreteEmptyDynamicColumn(const DataTypePtr & type_in_storage) { - if (!type_in_storage->hasDynamicSubcolumns()) + if (!type_in_storage->hasDynamicSubcolumnsDeprecated()) return type_in_storage; if (isObject(type_in_storage)) @@ -494,7 +494,7 @@ bool hasDynamicSubcolumns(const ColumnsDescription & columns) return std::any_of(columns.begin(), columns.end(), [](const auto & column) { - return column.type->hasDynamicSubcolumns(); + return column.type->hasDynamicSubcolumnsDeprecated(); }); } @@ -1065,7 +1065,7 @@ Field FieldVisitorFoldDimension::operator()(const Null & x) const void setAllObjectsToDummyTupleType(NamesAndTypesList & columns) { for (auto & column : columns) - if (column.type->hasDynamicSubcolumns()) + if (column.type->hasDynamicSubcolumnsDeprecated()) column.type = createConcreteEmptyDynamicColumn(column.type); } diff --git a/src/DataTypes/ObjectUtils.h b/src/DataTypes/ObjectUtils.h index 3e3b1b96740..6599d8adef1 100644 --- a/src/DataTypes/ObjectUtils.h +++ b/src/DataTypes/ObjectUtils.h @@ -194,7 +194,7 @@ ColumnsDescription getConcreteObjectColumns( /// dummy column will be removed. for (const auto & column : storage_columns) { - if (column.type->hasDynamicSubcolumns()) + if (column.type->hasDynamicSubcolumnsDeprecated()) types_in_entries[column.name].push_back(createConcreteEmptyDynamicColumn(column.type)); } @@ -204,7 +204,7 @@ ColumnsDescription getConcreteObjectColumns( for (const auto & column : entry_columns) { auto storage_column = storage_columns.tryGetPhysical(column.name); - if (storage_column && storage_column->type->hasDynamicSubcolumns()) + if (storage_column && storage_column->type->hasDynamicSubcolumnsDeprecated()) types_in_entries[column.name].push_back(column.type); } } diff --git a/src/DataTypes/Serializations/ISerialization.cpp b/src/DataTypes/Serializations/ISerialization.cpp index a3a28f8091c..dbe27a5f3f6 100644 --- a/src/DataTypes/Serializations/ISerialization.cpp +++ b/src/DataTypes/Serializations/ISerialization.cpp @@ -196,6 +196,8 @@ String getNameForSubstreamPath( stream_name += ".variant_offsets"; else if (it->type == Substream::VariantElement) stream_name += "." + it->variant_element_name; + else if (it->type == SubstreamType::DynamicStructure) + stream_name += ".dynamic_structure"; } return stream_name; @@ -271,6 +273,23 @@ ColumnPtr ISerialization::getFromSubstreamsCache(SubstreamsCache * cache, const return it == cache->end() ? nullptr : it->second; } +void ISerialization::addToSubstreamsDeserializeStatesCache(SubstreamsDeserializeStatesCache * cache, const SubstreamPath & path, DeserializeBinaryBulkStatePtr state) +{ + if (!cache || path.empty()) + return; + + cache->emplace(getSubcolumnNameForStream(path), state); +} + +ISerialization::DeserializeBinaryBulkStatePtr ISerialization::getFromSubstreamsDeserializeStatesCache(SubstreamsDeserializeStatesCache * cache, const SubstreamPath & path) +{ + if (!cache || path.empty()) + return nullptr; + + auto it = cache->find(getSubcolumnNameForStream(path)); + return it == cache->end() ? nullptr : it->second; +} + bool ISerialization::isSpecialCompressionAllowed(const SubstreamPath & path) { for (const auto & elem : path) diff --git a/src/DataTypes/Serializations/ISerialization.h b/src/DataTypes/Serializations/ISerialization.h index ebaa26d19a6..914ff9cf4a2 100644 --- a/src/DataTypes/Serializations/ISerialization.h +++ b/src/DataTypes/Serializations/ISerialization.h @@ -99,6 +99,19 @@ public: using SubcolumnCreatorPtr = std::shared_ptr; + struct SerializeBinaryBulkState + { + virtual ~SerializeBinaryBulkState() = default; + }; + + struct DeserializeBinaryBulkState + { + virtual ~DeserializeBinaryBulkState() = default; + }; + + using SerializeBinaryBulkStatePtr = std::shared_ptr; + using DeserializeBinaryBulkStatePtr = std::shared_ptr; + struct SubstreamData { SubstreamData() = default; @@ -125,10 +138,22 @@ public: return *this; } + SubstreamData & withDeserializeState(DeserializeBinaryBulkStatePtr deserialize_state_) + { + deserialize_state = std::move(deserialize_state_); + return *this; + } + SerializationPtr serialization; DataTypePtr type; ColumnPtr column; SerializationInfoPtr serialization_info; + + /// For types with dynamic subcolumns deserialize state contains information + /// about current dynamic structure. And this information can be useful + /// when we call enumerateStreams after deserializeBinaryBulkStatePrefix + /// to enumerate dynamic streams. + DeserializeBinaryBulkStatePtr deserialize_state; }; struct Substream @@ -160,6 +185,9 @@ public: VariantElements, VariantElement, + DynamicData, + DynamicStructure, + Regular, }; @@ -218,19 +246,6 @@ public: using OutputStreamGetter = std::function; using InputStreamGetter = std::function; - struct SerializeBinaryBulkState - { - virtual ~SerializeBinaryBulkState() = default; - }; - - struct DeserializeBinaryBulkState - { - virtual ~DeserializeBinaryBulkState() = default; - }; - - using SerializeBinaryBulkStatePtr = std::shared_ptr; - using DeserializeBinaryBulkStatePtr = std::shared_ptr; - struct SerializeBinaryBulkSettings { OutputStreamGetter getter; @@ -240,6 +255,14 @@ public: bool low_cardinality_use_single_dictionary_for_part = true; bool position_independent_encoding = true; + + enum class DynamicStatisticsMode + { + NONE, /// Don't write statistics. + PREFIX, /// Write statistics in prefix. + SUFFIX, /// Write statistics in suffix. + }; + DynamicStatisticsMode dynamic_write_statistics = DynamicStatisticsMode::NONE; }; struct DeserializeBinaryBulkSettings @@ -256,6 +279,8 @@ public: /// If not zero, may be used to avoid reallocations while reading column of String type. double avg_value_size_hint = 0; + + bool dynamic_read_statistics = false; }; /// Call before serializeBinaryBulkWithMultipleStreams chain to write something before first mark. @@ -270,10 +295,13 @@ public: SerializeBinaryBulkSettings & /*settings*/, SerializeBinaryBulkStatePtr & /*state*/) const {} + using SubstreamsDeserializeStatesCache = std::unordered_map; + /// Call before before deserializeBinaryBulkWithMultipleStreams chain to get DeserializeBinaryBulkStatePtr. virtual void deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkSettings & /*settings*/, - DeserializeBinaryBulkStatePtr & /*state*/) const {} + DeserializeBinaryBulkStatePtr & /*state*/, + SubstreamsDeserializeStatesCache * /*cache*/) const {} /** 'offset' and 'limit' are used to specify range. * limit = 0 - means no limit. @@ -393,6 +421,9 @@ public: static void addToSubstreamsCache(SubstreamsCache * cache, const SubstreamPath & path, ColumnPtr column); static ColumnPtr getFromSubstreamsCache(SubstreamsCache * cache, const SubstreamPath & path); + static void addToSubstreamsDeserializeStatesCache(SubstreamsDeserializeStatesCache * cache, const SubstreamPath & path, DeserializeBinaryBulkStatePtr state); + static DeserializeBinaryBulkStatePtr getFromSubstreamsDeserializeStatesCache(SubstreamsDeserializeStatesCache * cache, const SubstreamPath & path); + static bool isSpecialCompressionAllowed(const SubstreamPath & path); static size_t getArrayLevel(const SubstreamPath & path); diff --git a/src/DataTypes/Serializations/SerializationAggregateFunction.cpp b/src/DataTypes/Serializations/SerializationAggregateFunction.cpp index bab7c1d4cf2..55f7641e058 100644 --- a/src/DataTypes/Serializations/SerializationAggregateFunction.cpp +++ b/src/DataTypes/Serializations/SerializationAggregateFunction.cpp @@ -146,10 +146,10 @@ void SerializationAggregateFunction::serializeTextEscaped(const IColumn & column } -void SerializationAggregateFunction::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const +void SerializationAggregateFunction::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { String s; - readEscapedString(s, istr); + settings.tsv.crlf_end_of_line_input ? readEscapedStringCRLF(s, istr) : readEscapedString(s, istr); deserializeFromString(function, column, s, version); } diff --git a/src/DataTypes/Serializations/SerializationArray.cpp b/src/DataTypes/Serializations/SerializationArray.cpp index e8aab615849..ac7b8f4d084 100644 --- a/src/DataTypes/Serializations/SerializationArray.cpp +++ b/src/DataTypes/Serializations/SerializationArray.cpp @@ -254,7 +254,8 @@ void SerializationArray::enumerateStreams( auto next_data = SubstreamData(nested) .withType(type_array ? type_array->getNestedType() : nullptr) .withColumn(column_array ? column_array->getDataPtr() : nullptr) - .withSerializationInfo(data.serialization_info); + .withSerializationInfo(data.serialization_info) + .withDeserializeState(data.deserialize_state); nested->enumerateStreams(settings, callback, next_data); settings.path.pop_back(); @@ -284,10 +285,11 @@ void SerializationArray::serializeBinaryBulkStateSuffix( void SerializationArray::deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkSettings & settings, - DeserializeBinaryBulkStatePtr & state) const + DeserializeBinaryBulkStatePtr & state, + SubstreamsDeserializeStatesCache * cache) const { settings.path.push_back(Substream::ArrayElements); - nested->deserializeBinaryBulkStatePrefix(settings, state); + nested->deserializeBinaryBulkStatePrefix(settings, state, cache); settings.path.pop_back(); } diff --git a/src/DataTypes/Serializations/SerializationArray.h b/src/DataTypes/Serializations/SerializationArray.h index 82f5e8bce45..c3353f0c251 100644 --- a/src/DataTypes/Serializations/SerializationArray.h +++ b/src/DataTypes/Serializations/SerializationArray.h @@ -55,7 +55,8 @@ public: void deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkSettings & settings, - DeserializeBinaryBulkStatePtr & state) const override; + DeserializeBinaryBulkStatePtr & state, + SubstreamsDeserializeStatesCache * cache) const override; void serializeBinaryBulkWithMultipleStreams( const IColumn & column, @@ -71,7 +72,6 @@ public: DeserializeBinaryBulkStatePtr & state, SubstreamsCache * cache) const override; -private: struct SubcolumnCreator : public ISubcolumnCreator { const ColumnPtr offsets; diff --git a/src/DataTypes/Serializations/SerializationBool.cpp b/src/DataTypes/Serializations/SerializationBool.cpp index b63f25ddc35..a71c8a91cef 100644 --- a/src/DataTypes/Serializations/SerializationBool.cpp +++ b/src/DataTypes/Serializations/SerializationBool.cpp @@ -242,8 +242,10 @@ void SerializationBool::deserializeTextEscaped(IColumn & column, ReadBuffer & is { if (istr.eof()) throw Exception(ErrorCodes::CANNOT_PARSE_BOOL, "Expected boolean value but get EOF."); - - deserializeImpl(column, istr, settings, [](ReadBuffer & buf){ return buf.eof() || *buf.position() == '\t' || *buf.position() == '\n'; }); + if (settings.tsv.crlf_end_of_line_input) + deserializeImpl(column, istr, settings, [](ReadBuffer & buf){ return buf.eof() || *buf.position() == '\t' || *buf.position() == '\n' || *buf.position() == '\r'; }); + else + deserializeImpl(column, istr, settings, [](ReadBuffer & buf){ return buf.eof() || *buf.position() == '\t' || *buf.position() == '\n'; }); } bool SerializationBool::tryDeserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const diff --git a/src/DataTypes/Serializations/SerializationCustomSimpleText.cpp b/src/DataTypes/Serializations/SerializationCustomSimpleText.cpp index 938fd050173..1ba16f8492e 100644 --- a/src/DataTypes/Serializations/SerializationCustomSimpleText.cpp +++ b/src/DataTypes/Serializations/SerializationCustomSimpleText.cpp @@ -75,7 +75,7 @@ void SerializationCustomSimpleText::serializeTextEscaped(const IColumn & column, void SerializationCustomSimpleText::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { String str; - readEscapedString(str, istr); + settings.tsv.crlf_end_of_line_input ? readEscapedStringCRLF(str, istr) : readEscapedString(str, istr); deserializeFromString(*this, column, str, settings); } diff --git a/src/DataTypes/Serializations/SerializationDynamic.cpp b/src/DataTypes/Serializations/SerializationDynamic.cpp new file mode 100644 index 00000000000..6351ff0ca0b --- /dev/null +++ b/src/DataTypes/Serializations/SerializationDynamic.cpp @@ -0,0 +1,644 @@ +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int INCORRECT_DATA; + extern const int LOGICAL_ERROR; +} + +struct SerializeBinaryBulkStateDynamic : public ISerialization::SerializeBinaryBulkState +{ + SerializationDynamic::DynamicStructureSerializationVersion structure_version; + DataTypePtr variant_type; + Names variant_names; + SerializationPtr variant_serialization; + ISerialization::SerializeBinaryBulkStatePtr variant_state; + + /// Variants statistics. Map (Variant name) -> (Variant size). + ColumnDynamic::Statistics statistics = { .source = ColumnDynamic::Statistics::Source::READ, .data = {} }; + + explicit SerializeBinaryBulkStateDynamic(UInt64 structure_version_) : structure_version(structure_version_) {} +}; + +struct DeserializeBinaryBulkStateDynamic : public ISerialization::DeserializeBinaryBulkState +{ + SerializationPtr variant_serialization; + ISerialization::DeserializeBinaryBulkStatePtr variant_state; + ISerialization::DeserializeBinaryBulkStatePtr structure_state; +}; + +void SerializationDynamic::enumerateStreams( + EnumerateStreamsSettings & settings, + const StreamCallback & callback, + const SubstreamData & data) const +{ + settings.path.push_back(Substream::DynamicStructure); + callback(settings.path); + settings.path.pop_back(); + + const auto * column_dynamic = data.column ? &assert_cast(*data.column) : nullptr; + const auto * deserialize_state = data.deserialize_state ? checkAndGetState(data.deserialize_state) : nullptr; + + /// If column is nullptr and we don't have deserialize state yet, nothing to enumerate as we don't have any variants. + if (!column_dynamic && !deserialize_state) + return; + + const auto & variant_type = column_dynamic ? column_dynamic->getVariantInfo().variant_type : checkAndGetState(deserialize_state->structure_state)->variant_type; + auto variant_serialization = variant_type->getDefaultSerialization(); + + settings.path.push_back(Substream::DynamicData); + auto variant_data = SubstreamData(variant_serialization) + .withType(variant_type) + .withColumn(column_dynamic ? column_dynamic->getVariantColumnPtr() : nullptr) + .withSerializationInfo(data.serialization_info) + .withDeserializeState(deserialize_state ? deserialize_state->variant_state : nullptr); + settings.path.back().data = variant_data; + variant_serialization->enumerateStreams(settings, callback, variant_data); + settings.path.pop_back(); +} + +SerializationDynamic::DynamicStructureSerializationVersion::DynamicStructureSerializationVersion(UInt64 version) : value(static_cast(version)) +{ + checkVersion(version); +} + +void SerializationDynamic::DynamicStructureSerializationVersion::checkVersion(UInt64 version) +{ + if (version != VariantTypeName) + throw Exception(ErrorCodes::INCORRECT_DATA, "Invalid version for Dynamic structure serialization."); +} + +void SerializationDynamic::serializeBinaryBulkStatePrefix( + const DB::IColumn & column, + SerializeBinaryBulkSettings & settings, + SerializeBinaryBulkStatePtr & state) const +{ + const auto & column_dynamic = assert_cast(column); + const auto & variant_info = column_dynamic.getVariantInfo(); + + settings.path.push_back(Substream::DynamicStructure); + auto * stream = settings.getter(settings.path); + settings.path.pop_back(); + + if (!stream) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Missing stream for Dynamic column structure during serialization of binary bulk state prefix"); + + /// Write structure serialization version. + UInt64 structure_version = DynamicStructureSerializationVersion::Value::VariantTypeName; + writeBinaryLittleEndian(structure_version, *stream); + auto dynamic_state = std::make_shared(structure_version); + + dynamic_state->variant_type = variant_info.variant_type; + dynamic_state->variant_names = variant_info.variant_names; + const auto & variant_column = column_dynamic.getVariantColumn(); + + /// Write internal Variant type name. + writeStringBinary(dynamic_state->variant_type->getName(), *stream); + + /// Write statistics in prefix if needed. + if (settings.dynamic_write_statistics == SerializeBinaryBulkSettings::DynamicStatisticsMode::PREFIX) + { + const auto & statistics = column_dynamic.getStatistics(); + for (size_t i = 0; i != variant_info.variant_names.size(); ++i) + { + size_t size = 0; + /// Check if we can use statistics stored in the column. There are 2 possible sources + /// of this statistics: + /// - statistics calculated during merge of some data parts (Statistics::Source::MERGE) + /// - statistics read from the data part during deserialization of Dynamic column (Statistics::Source::READ). + /// We can rely only on statistics calculated during the merge, because column with statistics that was read + /// during deserialization from some data part could be filtered/limited/transformed/etc and so the statistics can be outdated. + if (!statistics.data.empty() && statistics.source == ColumnDynamic::Statistics::Source::MERGE) + size = statistics.data.at(variant_info.variant_names[i]); + /// Otherwise we can use only variant sizes from current column. + else + size = variant_column.getVariantByGlobalDiscriminator(i).size(); + writeVarUInt(size, *stream); + } + } + + dynamic_state->variant_serialization = dynamic_state->variant_type->getDefaultSerialization(); + settings.path.push_back(Substream::DynamicData); + dynamic_state->variant_serialization->serializeBinaryBulkStatePrefix(variant_column, settings, dynamic_state->variant_state); + settings.path.pop_back(); + + state = std::move(dynamic_state); +} + +void SerializationDynamic::deserializeBinaryBulkStatePrefix( + DeserializeBinaryBulkSettings & settings, + DeserializeBinaryBulkStatePtr & state, + SubstreamsDeserializeStatesCache * cache) const +{ + DeserializeBinaryBulkStatePtr structure_state = deserializeDynamicStructureStatePrefix(settings, cache); + if (!structure_state) + return; + + auto dynamic_state = std::make_shared(); + dynamic_state->structure_state = structure_state; + dynamic_state->variant_serialization = checkAndGetState(structure_state)->variant_type->getDefaultSerialization(); + + settings.path.push_back(Substream::DynamicData); + dynamic_state->variant_serialization->deserializeBinaryBulkStatePrefix(settings, dynamic_state->variant_state, cache); + settings.path.pop_back(); + + state = std::move(dynamic_state); +} + +ISerialization::DeserializeBinaryBulkStatePtr SerializationDynamic::deserializeDynamicStructureStatePrefix( + DeserializeBinaryBulkSettings & settings, SubstreamsDeserializeStatesCache * cache) +{ + settings.path.push_back(Substream::DynamicStructure); + + DeserializeBinaryBulkStatePtr state = nullptr; + if (auto cached_state = getFromSubstreamsDeserializeStatesCache(cache, settings.path)) + { + state = cached_state; + } + else if (auto * structure_stream = settings.getter(settings.path)) + { + /// Read structure serialization version. + UInt64 structure_version; + readBinaryLittleEndian(structure_version, *structure_stream); + auto structure_state = std::make_shared(structure_version); + /// Read internal Variant type name. + String data_type_name; + readStringBinary(data_type_name, *structure_stream); + structure_state->variant_type = DataTypeFactory::instance().get(data_type_name); + const auto * variant_type = typeid_cast(structure_state->variant_type.get()); + if (!variant_type) + throw Exception(ErrorCodes::INCORRECT_DATA, "Incorrect type of Dynamic nested column, expected Variant, got {}", structure_state->variant_type->getName()); + + /// Read statistics. + if (settings.dynamic_read_statistics) + { + const auto & variants = variant_type->getVariants(); + size_t variant_size; + for (const auto & variant : variants) + { + readVarUInt(variant_size, *structure_stream); + structure_state->statistics.data[variant->getName()] = variant_size; + } + } + + state = structure_state; + addToSubstreamsDeserializeStatesCache(cache, settings.path, state); + } + + settings.path.pop_back(); + return state; +} + +void SerializationDynamic::serializeBinaryBulkStateSuffix( + SerializeBinaryBulkSettings & settings, SerializeBinaryBulkStatePtr & state) const +{ + auto * dynamic_state = checkAndGetState(state); + settings.path.push_back(Substream::DynamicStructure); + auto * stream = settings.getter(settings.path); + settings.path.pop_back(); + + if (!stream) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Missing stream for Dynamic column structure during serialization of binary bulk state prefix"); + + /// Write statistics in suffix if needed. + if (settings.dynamic_write_statistics == SerializeBinaryBulkSettings::DynamicStatisticsMode::SUFFIX) + { + for (const auto & variant_name : dynamic_state->variant_names) + writeVarUInt(dynamic_state->statistics.data[variant_name], *stream); + } + + settings.path.push_back(Substream::DynamicData); + dynamic_state->variant_serialization->serializeBinaryBulkStateSuffix(settings, dynamic_state->variant_state); + settings.path.pop_back(); +} + +void SerializationDynamic::serializeBinaryBulkWithMultipleStreams( + const DB::IColumn & column, + size_t offset, + size_t limit, + SerializeBinaryBulkSettings & settings, + SerializeBinaryBulkStatePtr & state) const +{ + const auto & column_dynamic = assert_cast(column); + auto * dynamic_state = checkAndGetState(state); + const auto & variant_info = column_dynamic.getVariantInfo(); + const auto * variant_column = &column_dynamic.getVariantColumn(); + + if (!variant_info.variant_type->equals(*dynamic_state->variant_type)) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Mismatch of internal columns of Dynamic. Expected: {}, Got: {}", dynamic_state->variant_type->getName(), variant_info.variant_type->getName()); + + settings.path.push_back(Substream::DynamicData); + assert_cast(*dynamic_state->variant_serialization) + .serializeBinaryBulkWithMultipleStreamsAndUpdateVariantStatistics(*variant_column, offset, limit, settings, dynamic_state->variant_state, dynamic_state->statistics.data); + settings.path.pop_back(); +} + +void SerializationDynamic::deserializeBinaryBulkWithMultipleStreams( + DB::ColumnPtr & column, + size_t limit, + DeserializeBinaryBulkSettings & settings, + DeserializeBinaryBulkStatePtr & state, + SubstreamsCache * cache) const +{ + if (!state) + return; + + auto mutable_column = column->assumeMutable(); + auto * dynamic_state = checkAndGetState(state); + auto * structure_state = checkAndGetState(dynamic_state->structure_state); + + if (mutable_column->empty()) + mutable_column = ColumnDynamic::create(structure_state->variant_type->createColumn(), structure_state->variant_type, max_dynamic_types, structure_state->statistics); + + auto & column_dynamic = assert_cast(*mutable_column); + const auto & variant_info = column_dynamic.getVariantInfo(); + if (!variant_info.variant_type->equals(*structure_state->variant_type)) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Mismatch of internal columns of Dynamic. Expected: {}, Got: {}", structure_state->variant_type->getName(), variant_info.variant_type->getName()); + + settings.path.push_back(Substream::DynamicData); + dynamic_state->variant_serialization->deserializeBinaryBulkWithMultipleStreams(column_dynamic.getVariantColumnPtr(), limit, settings, dynamic_state->variant_state, cache); + settings.path.pop_back(); + + column = std::move(mutable_column); +} + +void SerializationDynamic::serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings & settings) const +{ + UInt8 null_bit = field.isNull(); + writeBinary(null_bit, ostr); + if (null_bit) + return; + + auto field_type = applyVisitor(FieldToDataType(), field); + auto field_type_name = field_type->getName(); + writeVarUInt(field_type_name.size(), ostr); + writeString(field_type_name, ostr); + field_type->getDefaultSerialization()->serializeBinary(field, ostr, settings); +} + +void SerializationDynamic::deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings & settings) const +{ + UInt8 null_bit; + readBinary(null_bit, istr); + if (null_bit) + { + field = Null(); + return; + } + + size_t field_type_name_size; + readVarUInt(field_type_name_size, istr); + String field_type_name(field_type_name_size, 0); + istr.readStrict(field_type_name.data(), field_type_name_size); + auto field_type = DataTypeFactory::instance().get(field_type_name); + field_type->getDefaultSerialization()->deserializeBinary(field, istr, settings); +} + +void SerializationDynamic::serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const +{ + const auto & dynamic_column = assert_cast(column); + const auto & variant_info = dynamic_column.getVariantInfo(); + const auto & variant_column = dynamic_column.getVariantColumn(); + auto global_discr = variant_column.globalDiscriminatorAt(row_num); + + UInt8 null_bit = global_discr == ColumnVariant::NULL_DISCRIMINATOR; + writeBinary(null_bit, ostr); + if (null_bit) + return; + + const auto & variant_type = assert_cast(*variant_info.variant_type).getVariant(global_discr); + const auto & variant_type_name = variant_info.variant_names[global_discr]; + writeVarUInt(variant_type_name.size(), ostr); + writeString(variant_type_name, ostr); + variant_type->getDefaultSerialization()->serializeBinary(variant_column.getVariantByGlobalDiscriminator(global_discr), variant_column.offsetAt(row_num), ostr, settings); +} + +template +static void deserializeVariant( + ColumnVariant & variant_column, + const DataTypePtr & variant_type, + ColumnVariant::Discriminator global_discr, + ReadBuffer & istr, + DeserializeFunc deserialize) +{ + auto & variant = variant_column.getVariantByGlobalDiscriminator(global_discr); + deserialize(*variant_type->getDefaultSerialization(), variant, istr); + variant_column.getLocalDiscriminators().push_back(variant_column.localDiscriminatorByGlobal(global_discr)); + variant_column.getOffsets().push_back(variant.size() - 1); +} + +void SerializationDynamic::deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +{ + auto & dynamic_column = assert_cast(column); + UInt8 null_bit; + readBinary(null_bit, istr); + if (null_bit) + { + dynamic_column.insertDefault(); + return; + } + + size_t variant_type_name_size; + readVarUInt(variant_type_name_size, istr); + String variant_type_name(variant_type_name_size, 0); + istr.readStrict(variant_type_name.data(), variant_type_name_size); + + const auto & variant_info = dynamic_column.getVariantInfo(); + auto it = variant_info.variant_name_to_discriminator.find(variant_type_name); + if (it != variant_info.variant_name_to_discriminator.end()) + { + const auto & variant_type = assert_cast(*variant_info.variant_type).getVariant(it->second); + deserializeVariant(dynamic_column.getVariantColumn(), variant_type, it->second, istr, [&settings](const ISerialization & serialization, IColumn & variant, ReadBuffer & buf){ serialization.deserializeBinary(variant, buf, settings); }); + return; + } + + /// We don't have this variant yet. Let's try to add it. + auto variant_type = DataTypeFactory::instance().get(variant_type_name); + if (dynamic_column.addNewVariant(variant_type)) + { + auto discr = variant_info.variant_name_to_discriminator.at(variant_type_name); + deserializeVariant(dynamic_column.getVariantColumn(), variant_type, discr, istr, [&settings](const ISerialization & serialization, IColumn & variant, ReadBuffer & buf){ serialization.deserializeBinary(variant, buf, settings); }); + return; + } + + /// We reached maximum number of variants and couldn't add new variant. + /// This case should be really rare in real use cases. + /// We should always be able to add String variant and insert value as String. + dynamic_column.addStringVariant(); + auto tmp_variant_column = variant_type->createColumn(); + variant_type->getDefaultSerialization()->deserializeBinary(*tmp_variant_column, istr, settings); + auto string_column = castColumn(ColumnWithTypeAndName(tmp_variant_column->getPtr(), variant_type, ""), std::make_shared()); + auto & variant_column = dynamic_column.getVariantColumn(); + variant_column.insertIntoVariantFrom(variant_info.variant_name_to_discriminator.at("String"), *string_column, 0); +} + +void SerializationDynamic::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const +{ + const auto & dynamic_column = assert_cast(column); + dynamic_column.getVariantInfo().variant_type->getDefaultSerialization()->serializeTextCSV(dynamic_column.getVariantColumn(), row_num, ostr, settings); +} + +template +static void deserializeTextImpl( + IColumn & column, + ReadBuffer & istr, + const FormatSettings & settings, + ReadFieldFunc read_field, + FormatSettings::EscapingRule escaping_rule, + TryDeserializeVariantFunc try_deserialize_variant, + DeserializeVariant deserialize_variant) +{ + auto & dynamic_column = assert_cast(column); + auto & variant_column = dynamic_column.getVariantColumn(); + const auto & variant_info = dynamic_column.getVariantInfo(); + String field = read_field(istr); + auto field_buf = std::make_unique(field); + JSONInferenceInfo json_info; + auto variant_type = tryInferDataTypeByEscapingRule(field, settings, escaping_rule, &json_info); + if (escaping_rule == FormatSettings::EscapingRule::JSON) + transformFinalInferredJSONTypeIfNeeded(variant_type, settings, &json_info); + + if (checkIfTypeIsComplete(variant_type) && dynamic_column.addNewVariant(variant_type)) + { + auto discr = variant_info.variant_name_to_discriminator.at(variant_type->getName()); + deserializeVariant(dynamic_column.getVariantColumn(), variant_type, discr, *field_buf, deserialize_variant); + return; + } + + /// We couldn't infer type or add new variant. Try to insert field into current variants. + field_buf = std::make_unique(field); + if (try_deserialize_variant(*variant_info.variant_type->getDefaultSerialization(), variant_column, *field_buf)) + return; + + /// We couldn't insert field into any existing variant, add String variant and read value as String. + dynamic_column.addStringVariant(); + + if (escaping_rule == FormatSettings::EscapingRule::Quoted && (field.size() < 2 || field.front() != '\'' || field.back() != '\'')) + field = "'" + field + "'"; + + field_buf = std::make_unique(field); + auto string_discr = variant_info.variant_name_to_discriminator.at("String"); + deserializeVariant(dynamic_column.getVariantColumn(), std::make_shared(), string_discr, *field_buf, deserialize_variant); +} + +void SerializationDynamic::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +{ + auto read_field = [&settings](ReadBuffer & buf) + { + String field; + readCSVField(field, buf, settings.csv); + return field; + }; + + auto try_deserialize_variant = [&settings](const ISerialization & serialization, IColumn & col, ReadBuffer & buf) + { + return serialization.tryDeserializeTextCSV(col, buf, settings); + }; + + auto deserialize_variant = [&settings](const ISerialization & serialization, IColumn & col, ReadBuffer & buf) + { + serialization.deserializeTextCSV(col, buf, settings); + }; + + deserializeTextImpl(column, istr, settings, read_field, FormatSettings::EscapingRule::CSV, try_deserialize_variant, deserialize_variant); +} + +bool SerializationDynamic::tryDeserializeTextCSV(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings & settings) const +{ + deserializeTextCSV(column, istr, settings); + return true; +} + +void SerializationDynamic::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const +{ + const auto & dynamic_column = assert_cast(column); + dynamic_column.getVariantInfo().variant_type->getDefaultSerialization()->serializeTextEscaped(dynamic_column.getVariantColumn(), row_num, ostr, settings); +} + +void SerializationDynamic::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +{ + auto read_field = [](ReadBuffer & buf) + { + String field; + readEscapedString(field, buf); + return field; + }; + + auto try_deserialize_variant = [&settings](const ISerialization & serialization, IColumn & col, ReadBuffer & buf) + { + return serialization.tryDeserializeTextEscaped(col, buf, settings); + }; + + auto deserialize_variant = [&settings](const ISerialization & serialization, IColumn & col, ReadBuffer & buf) + { + serialization.deserializeTextEscaped(col, buf, settings); + }; + + deserializeTextImpl(column, istr, settings, read_field, FormatSettings::EscapingRule::Escaped, try_deserialize_variant, deserialize_variant); +} + +bool SerializationDynamic::tryDeserializeTextEscaped(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings & settings) const +{ + deserializeTextEscaped(column, istr, settings); + return true; +} + +void SerializationDynamic::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const +{ + const auto & dynamic_column = assert_cast(column); + dynamic_column.getVariantInfo().variant_type->getDefaultSerialization()->serializeTextQuoted(dynamic_column.getVariantColumn(), row_num, ostr, settings); +} + +void SerializationDynamic::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +{ + auto read_field = [](ReadBuffer & buf) + { + String field; + readQuotedField(field, buf); + return field; + }; + + auto try_deserialize_variant = [&settings](const ISerialization & serialization, IColumn & col, ReadBuffer & buf) + { + return serialization.tryDeserializeTextQuoted(col, buf, settings); + }; + + auto deserialize_variant = [&settings](const ISerialization & serialization, IColumn & col, ReadBuffer & buf) + { + serialization.deserializeTextQuoted(col, buf, settings); + }; + + deserializeTextImpl(column, istr, settings, read_field, FormatSettings::EscapingRule::Quoted, try_deserialize_variant, deserialize_variant); +} + +bool SerializationDynamic::tryDeserializeTextQuoted(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings & settings) const +{ + deserializeTextQuoted(column, istr, settings); + return true; +} + +void SerializationDynamic::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const +{ + const auto & dynamic_column = assert_cast(column); + dynamic_column.getVariantInfo().variant_type->getDefaultSerialization()->serializeTextJSON(dynamic_column.getVariantColumn(), row_num, ostr, settings); +} + +void SerializationDynamic::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +{ + auto read_field = [&settings](ReadBuffer & buf) + { + String field; + readJSONField(field, buf, settings.json); + return field; + }; + + auto try_deserialize_variant = [&settings](const ISerialization & serialization, IColumn & col, ReadBuffer & buf) + { + return serialization.tryDeserializeTextJSON(col, buf, settings); + }; + + auto deserialize_variant = [&settings](const ISerialization & serialization, IColumn & col, ReadBuffer & buf) + { + serialization.deserializeTextJSON(col, buf, settings); + }; + + deserializeTextImpl(column, istr, settings, read_field, FormatSettings::EscapingRule::JSON, try_deserialize_variant, deserialize_variant); +} + +bool SerializationDynamic::tryDeserializeTextJSON(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings & settings) const +{ + deserializeTextJSON(column, istr, settings); + return true; +} + +void SerializationDynamic::serializeTextRaw(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const +{ + const auto & dynamic_column = assert_cast(column); + dynamic_column.getVariantInfo().variant_type->getDefaultSerialization()->serializeTextRaw(dynamic_column.getVariantColumn(), row_num, ostr, settings); +} + +void SerializationDynamic::deserializeTextRaw(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +{ + auto read_field = [](ReadBuffer & buf) + { + String field; + readString(field, buf); + return field; + }; + + auto try_deserialize_variant = [&settings](const ISerialization & serialization, IColumn & col, ReadBuffer & buf) + { + return serialization.tryDeserializeTextRaw(col, buf, settings); + }; + + auto deserialize_variant = [&settings](const ISerialization & serialization, IColumn & col, ReadBuffer & buf) + { + serialization.deserializeTextRaw(col, buf, settings); + }; + + deserializeTextImpl(column, istr, settings, read_field, FormatSettings::EscapingRule::Raw, try_deserialize_variant, deserialize_variant); +} + +bool SerializationDynamic::tryDeserializeTextRaw(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings & settings) const +{ + deserializeTextRaw(column, istr, settings); + return true; +} + +void SerializationDynamic::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const +{ + const auto & dynamic_column = assert_cast(column); + dynamic_column.getVariantInfo().variant_type->getDefaultSerialization()->serializeText(dynamic_column.getVariantColumn(), row_num, ostr, settings); +} + +void SerializationDynamic::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +{ + auto read_field = [](ReadBuffer & buf) + { + String field; + readStringUntilEOF(field, buf); + return field; + }; + + auto try_deserialize_variant = [&settings](const ISerialization & serialization, IColumn & col, ReadBuffer & buf) + { + return serialization.tryDeserializeWholeText(col, buf, settings); + }; + + auto deserialize_variant = [&settings](const ISerialization & serialization, IColumn & col, ReadBuffer & buf) + { + serialization.deserializeWholeText(col, buf, settings); + }; + + deserializeTextImpl(column, istr, settings, read_field, FormatSettings::EscapingRule::Raw, try_deserialize_variant, deserialize_variant); +} + +bool SerializationDynamic::tryDeserializeWholeText(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings & settings) const +{ + deserializeWholeText(column, istr, settings); + return true; +} + +void SerializationDynamic::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const +{ + const auto & dynamic_column = assert_cast(column); + dynamic_column.getVariantInfo().variant_type->getDefaultSerialization()->serializeTextXML(dynamic_column.getVariantColumn(), row_num, ostr, settings); +} + +} diff --git a/src/DataTypes/Serializations/SerializationDynamic.h b/src/DataTypes/Serializations/SerializationDynamic.h new file mode 100644 index 00000000000..001a3cf87ce --- /dev/null +++ b/src/DataTypes/Serializations/SerializationDynamic.h @@ -0,0 +1,116 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class SerializationDynamicElement; + +class SerializationDynamic : public ISerialization +{ +public: + explicit SerializationDynamic(size_t max_dynamic_types_) : max_dynamic_types(max_dynamic_types_) + { + } + + struct DynamicStructureSerializationVersion + { + enum Value + { + VariantTypeName = 1, + }; + + Value value; + + static void checkVersion(UInt64 version); + + explicit DynamicStructureSerializationVersion(UInt64 version); + }; + + void enumerateStreams( + EnumerateStreamsSettings & settings, + const StreamCallback & callback, + const SubstreamData & data) const override; + + void serializeBinaryBulkStatePrefix( + const IColumn & column, + SerializeBinaryBulkSettings & settings, + SerializeBinaryBulkStatePtr & state) const override; + + void serializeBinaryBulkStateSuffix( + SerializeBinaryBulkSettings & settings, + SerializeBinaryBulkStatePtr & state) const override; + + void deserializeBinaryBulkStatePrefix( + DeserializeBinaryBulkSettings & settings, + DeserializeBinaryBulkStatePtr & state, + SubstreamsDeserializeStatesCache * cache) const override; + + static DeserializeBinaryBulkStatePtr deserializeDynamicStructureStatePrefix( + DeserializeBinaryBulkSettings & settings, + SubstreamsDeserializeStatesCache * cache); + + void serializeBinaryBulkWithMultipleStreams( + const IColumn & column, + size_t offset, + size_t limit, + SerializeBinaryBulkSettings & settings, + SerializeBinaryBulkStatePtr & state) const override; + + void deserializeBinaryBulkWithMultipleStreams( + ColumnPtr & column, + size_t limit, + DeserializeBinaryBulkSettings & settings, + DeserializeBinaryBulkStatePtr & state, + SubstreamsCache * cache) const override; + + void serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings & settings) const override; + void deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings & settings) const override; + + void serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; + void deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; + + void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; + void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; + bool tryDeserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; + + void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; + void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; + bool tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; + + void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; + void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; + bool tryDeserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; + + void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; + void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; + bool tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; + + void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; + void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; + bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; + + void serializeTextRaw(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; + void deserializeTextRaw(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; + bool tryDeserializeTextRaw(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; + + void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; + +private: + friend SerializationDynamicElement; + + struct DeserializeBinaryBulkStateDynamicStructure : public ISerialization::DeserializeBinaryBulkState + { + DynamicStructureSerializationVersion structure_version; + DataTypePtr variant_type; + ColumnDynamic::Statistics statistics = {.source = ColumnDynamic::Statistics::Source::READ, .data = {}}; + + explicit DeserializeBinaryBulkStateDynamicStructure(UInt64 structure_version_) : structure_version(structure_version_) {} + }; + + size_t max_dynamic_types; +}; + +} diff --git a/src/DataTypes/Serializations/SerializationDynamicElement.cpp b/src/DataTypes/Serializations/SerializationDynamicElement.cpp new file mode 100644 index 00000000000..dafd6d663b0 --- /dev/null +++ b/src/DataTypes/Serializations/SerializationDynamicElement.cpp @@ -0,0 +1,119 @@ +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NOT_IMPLEMENTED; +} + + +struct DeserializeBinaryBulkStateDynamicElement : public ISerialization::DeserializeBinaryBulkState +{ + ISerialization::DeserializeBinaryBulkStatePtr structure_state; + SerializationPtr variant_serialization; + ISerialization::DeserializeBinaryBulkStatePtr variant_element_state; +}; + +void SerializationDynamicElement::enumerateStreams( + DB::ISerialization::EnumerateStreamsSettings & settings, + const DB::ISerialization::StreamCallback & callback, + const DB::ISerialization::SubstreamData & data) const +{ + settings.path.push_back(Substream::DynamicStructure); + callback(settings.path); + settings.path.pop_back(); + + /// If we didn't deserialize prefix yet, we don't know if we actually have this variant in Dynamic column, + /// so we cannot enumerate variant streams. + if (!data.deserialize_state) + return; + + auto * deserialize_state = checkAndGetState(data.deserialize_state); + /// If we don't have this variant, no need to enumerate streams for it as we won't read from any stream. + if (!deserialize_state->variant_serialization) + return; + + settings.path.push_back(Substream::DynamicData); + auto variant_data = SubstreamData(deserialize_state->variant_serialization) + .withType(data.type) + .withColumn(data.column) + .withSerializationInfo(data.serialization_info) + .withDeserializeState(deserialize_state->variant_element_state); + deserialize_state->variant_serialization->enumerateStreams(settings, callback, variant_data); + settings.path.pop_back(); +} + +void SerializationDynamicElement::serializeBinaryBulkStatePrefix(const IColumn &, SerializeBinaryBulkSettings &, SerializeBinaryBulkStatePtr &) const +{ + throw Exception( + ErrorCodes::NOT_IMPLEMENTED, "Method serializeBinaryBulkStatePrefix is not implemented for SerializationDynamicElement"); +} + +void SerializationDynamicElement::serializeBinaryBulkStateSuffix(SerializeBinaryBulkSettings &, SerializeBinaryBulkStatePtr &) const +{ + throw Exception( + ErrorCodes::NOT_IMPLEMENTED, "Method serializeBinaryBulkStateSuffix is not implemented for SerializationDynamicElement"); +} + +void SerializationDynamicElement::deserializeBinaryBulkStatePrefix( + DeserializeBinaryBulkSettings & settings, DeserializeBinaryBulkStatePtr & state, SubstreamsDeserializeStatesCache * cache) const +{ + DeserializeBinaryBulkStatePtr structure_state = SerializationDynamic::deserializeDynamicStructureStatePrefix(settings, cache); + if (!structure_state) + return; + + auto dynamic_element_state = std::make_shared(); + dynamic_element_state->structure_state = std::move(structure_state); + const auto & variant_type = checkAndGetState(dynamic_element_state->structure_state)->variant_type; + /// Check if we actually have required element in the Variant. + if (auto global_discr = assert_cast(*variant_type).tryGetVariantDiscriminator(dynamic_element_name)) + { + settings.path.push_back(Substream::DynamicData); + dynamic_element_state->variant_serialization = std::make_shared(nested_serialization, dynamic_element_name, *global_discr); + dynamic_element_state->variant_serialization->deserializeBinaryBulkStatePrefix(settings, dynamic_element_state->variant_element_state, cache); + settings.path.pop_back(); + } + + state = std::move(dynamic_element_state); +} + +void SerializationDynamicElement::serializeBinaryBulkWithMultipleStreams(const IColumn &, size_t, size_t, SerializeBinaryBulkSettings &, SerializeBinaryBulkStatePtr &) const +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method serializeBinaryBulkWithMultipleStreams is not implemented for SerializationDynamicElement"); +} + +void SerializationDynamicElement::deserializeBinaryBulkWithMultipleStreams( + ColumnPtr & result_column, + size_t limit, + DeserializeBinaryBulkSettings & settings, + DeserializeBinaryBulkStatePtr & state, + SubstreamsCache * cache) const +{ + if (!state) + return; + + auto * dynamic_element_state = checkAndGetState(state); + + if (dynamic_element_state->variant_serialization) + { + settings.path.push_back(Substream::DynamicData); + dynamic_element_state->variant_serialization->deserializeBinaryBulkWithMultipleStreams(result_column, limit, settings, dynamic_element_state->variant_element_state, cache); + settings.path.pop_back(); + } + else + { + auto mutable_column = result_column->assumeMutable(); + mutable_column->insertManyDefaults(limit); + result_column = std::move(mutable_column); + } +} + +} diff --git a/src/DataTypes/Serializations/SerializationDynamicElement.h b/src/DataTypes/Serializations/SerializationDynamicElement.h new file mode 100644 index 00000000000..2ddc3324139 --- /dev/null +++ b/src/DataTypes/Serializations/SerializationDynamicElement.h @@ -0,0 +1,58 @@ +#pragma once + +#include + +namespace DB +{ + + +/// Serialization for Dynamic element when we read it as a subcolumn. +class SerializationDynamicElement final : public SerializationWrapper +{ +private: + /// To be able to deserialize Dynamic element as a subcolumn + /// we need its type name and global discriminator. + String dynamic_element_name; + +public: + SerializationDynamicElement(const SerializationPtr & nested_, const String & dynamic_element_name_) + : SerializationWrapper(nested_) + , dynamic_element_name(dynamic_element_name_) + { + } + + void enumerateStreams( + EnumerateStreamsSettings & settings, + const StreamCallback & callback, + const SubstreamData & data) const override; + + void serializeBinaryBulkStatePrefix( + const IColumn & column, + SerializeBinaryBulkSettings & settings, + SerializeBinaryBulkStatePtr & state) const override; + + void serializeBinaryBulkStateSuffix( + SerializeBinaryBulkSettings & settings, + SerializeBinaryBulkStatePtr & state) const override; + + void deserializeBinaryBulkStatePrefix( + DeserializeBinaryBulkSettings & settings, + DeserializeBinaryBulkStatePtr & state, + SubstreamsDeserializeStatesCache * cache) const override; + + void serializeBinaryBulkWithMultipleStreams( + const IColumn & column, + size_t offset, + size_t limit, + SerializeBinaryBulkSettings & settings, + SerializeBinaryBulkStatePtr & state) const override; + + void deserializeBinaryBulkWithMultipleStreams( + ColumnPtr & column, + size_t limit, + DeserializeBinaryBulkSettings & settings, + DeserializeBinaryBulkStatePtr & state, + SubstreamsCache * cache) const override; +}; + +} diff --git a/src/DataTypes/Serializations/SerializationEnum.cpp b/src/DataTypes/Serializations/SerializationEnum.cpp index d72442eec99..6d36c6a9a96 100644 --- a/src/DataTypes/Serializations/SerializationEnum.cpp +++ b/src/DataTypes/Serializations/SerializationEnum.cpp @@ -29,7 +29,7 @@ void SerializationEnum::deserializeTextEscaped(IColumn & column, ReadBuffe { /// NOTE It would be nice to do without creating a temporary object - at least extract std::string out. std::string field_name; - readEscapedString(field_name, istr); + settings.tsv.crlf_end_of_line_input ? readEscapedStringCRLF(field_name, istr) : readEscapedString(field_name, istr); assert_cast(column).getData().push_back(ref_enum_values.getValue(StringRef(field_name), true)); } } diff --git a/src/DataTypes/Serializations/SerializationFixedString.cpp b/src/DataTypes/Serializations/SerializationFixedString.cpp index 481ae2a6165..f919dc16d33 100644 --- a/src/DataTypes/Serializations/SerializationFixedString.cpp +++ b/src/DataTypes/Serializations/SerializationFixedString.cpp @@ -10,8 +10,10 @@ #include #include +#include "Common/PODArray.h" #include #include +#include "base/types.h" namespace DB { @@ -183,14 +185,17 @@ static inline bool tryRead(const SerializationFixedString & self, IColumn & colu } -void SerializationFixedString::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const +void SerializationFixedString::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { - read(*this, column, [&istr](ColumnFixedString::Chars & data) { readEscapedStringInto(data, istr); }); + read(*this, column, [&istr, &settings](ColumnFixedString::Chars & data) + { + settings.tsv.crlf_end_of_line_input ? readEscapedStringInto(data, istr) : readEscapedStringInto(data, istr); + }); } bool SerializationFixedString::tryDeserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const { - return tryRead(*this, column, [&istr](ColumnFixedString::Chars & data) { readEscapedStringInto(data, istr); return true; }); + return tryRead(*this, column, [&istr](ColumnFixedString::Chars & data) { readEscapedStringInto,false>(data, istr); return true; }); } diff --git a/src/DataTypes/Serializations/SerializationInterval.cpp b/src/DataTypes/Serializations/SerializationInterval.cpp index c4ef34b4325..ef96ad4729f 100644 --- a/src/DataTypes/Serializations/SerializationInterval.cpp +++ b/src/DataTypes/Serializations/SerializationInterval.cpp @@ -68,9 +68,9 @@ void SerializationInterval::deserializeBinaryBulk(IColumn & column, ReadBuffer & } void SerializationInterval::deserializeBinaryBulkStatePrefix( - DeserializeBinaryBulkSettings & settings, DeserializeBinaryBulkStatePtr & state) const + DeserializeBinaryBulkSettings & settings, DeserializeBinaryBulkStatePtr & state, SubstreamsDeserializeStatesCache * cache) const { - dispatch(&ISerialization::deserializeBinaryBulkStatePrefix, FormatSettings::IntervalOutputFormat::Numeric, settings, state); + dispatch(&ISerialization::deserializeBinaryBulkStatePrefix, FormatSettings::IntervalOutputFormat::Numeric, settings, state, cache); } diff --git a/src/DataTypes/Serializations/SerializationInterval.h b/src/DataTypes/Serializations/SerializationInterval.h index a4e6c204e4f..368aff4f0c3 100644 --- a/src/DataTypes/Serializations/SerializationInterval.h +++ b/src/DataTypes/Serializations/SerializationInterval.h @@ -34,7 +34,10 @@ public: void deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings & settings) const override; void deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; void deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double avg_value_size_hint) const override; - void deserializeBinaryBulkStatePrefix(DeserializeBinaryBulkSettings & settings, DeserializeBinaryBulkStatePtr & state) const override; + void deserializeBinaryBulkStatePrefix( + DeserializeBinaryBulkSettings & settings, + DeserializeBinaryBulkStatePtr & state, + SubstreamsDeserializeStatesCache * cache) const override; void deserializeBinaryBulkWithMultipleStreams( ColumnPtr & column, size_t limit, diff --git a/src/DataTypes/Serializations/SerializationLowCardinality.cpp b/src/DataTypes/Serializations/SerializationLowCardinality.cpp index 2d2be195098..40071c4607a 100644 --- a/src/DataTypes/Serializations/SerializationLowCardinality.cpp +++ b/src/DataTypes/Serializations/SerializationLowCardinality.cpp @@ -267,7 +267,8 @@ void SerializationLowCardinality::serializeBinaryBulkStateSuffix( void SerializationLowCardinality::deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkSettings & settings, - DeserializeBinaryBulkStatePtr & state) const + DeserializeBinaryBulkStatePtr & state, + SubstreamsDeserializeStatesCache * /*cache*/) const { settings.path.push_back(Substream::DictionaryKeys); auto * stream = settings.getter(settings.path); @@ -515,8 +516,14 @@ void SerializationLowCardinality::deserializeBinaryBulkWithMultipleStreams( size_t limit, DeserializeBinaryBulkSettings & settings, DeserializeBinaryBulkStatePtr & state, - SubstreamsCache * /* cache */) const + SubstreamsCache * cache) const { + if (auto cached_column = getFromSubstreamsCache(cache, settings.path)) + { + column = cached_column; + return; + } + auto mutable_column = column->assumeMutable(); ColumnLowCardinality & low_cardinality_column = typeid_cast(*mutable_column); @@ -670,6 +677,7 @@ void SerializationLowCardinality::deserializeBinaryBulkWithMultipleStreams( } column = std::move(mutable_column); + addToSubstreamsCache(cache, settings.path, column); } void SerializationLowCardinality::serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings & settings) const diff --git a/src/DataTypes/Serializations/SerializationLowCardinality.h b/src/DataTypes/Serializations/SerializationLowCardinality.h index d2c3a95c702..aa64e956a64 100644 --- a/src/DataTypes/Serializations/SerializationLowCardinality.h +++ b/src/DataTypes/Serializations/SerializationLowCardinality.h @@ -33,7 +33,8 @@ public: void deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkSettings & settings, - DeserializeBinaryBulkStatePtr & state) const override; + DeserializeBinaryBulkStatePtr & state, + SubstreamsDeserializeStatesCache * cache) const override; void serializeBinaryBulkWithMultipleStreams( const IColumn & column, diff --git a/src/DataTypes/Serializations/SerializationMap.cpp b/src/DataTypes/Serializations/SerializationMap.cpp index 49bc89687f1..70fe5182ade 100644 --- a/src/DataTypes/Serializations/SerializationMap.cpp +++ b/src/DataTypes/Serializations/SerializationMap.cpp @@ -398,7 +398,8 @@ void SerializationMap::enumerateStreams( auto next_data = SubstreamData(nested) .withType(data.type ? assert_cast(*data.type).getNestedType() : nullptr) .withColumn(data.column ? assert_cast(*data.column).getNestedColumnPtr() : nullptr) - .withSerializationInfo(data.serialization_info); + .withSerializationInfo(data.serialization_info) + .withDeserializeState(data.deserialize_state); nested->enumerateStreams(settings, callback, next_data); } @@ -420,9 +421,10 @@ void SerializationMap::serializeBinaryBulkStateSuffix( void SerializationMap::deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkSettings & settings, - DeserializeBinaryBulkStatePtr & state) const + DeserializeBinaryBulkStatePtr & state, + SubstreamsDeserializeStatesCache * cache) const { - nested->deserializeBinaryBulkStatePrefix(settings, state); + nested->deserializeBinaryBulkStatePrefix(settings, state, cache); } diff --git a/src/DataTypes/Serializations/SerializationMap.h b/src/DataTypes/Serializations/SerializationMap.h index 3e27ef1b04a..cfcde445c1f 100644 --- a/src/DataTypes/Serializations/SerializationMap.h +++ b/src/DataTypes/Serializations/SerializationMap.h @@ -51,7 +51,8 @@ public: void deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkSettings & settings, - DeserializeBinaryBulkStatePtr & state) const override; + DeserializeBinaryBulkStatePtr & state, + SubstreamsDeserializeStatesCache * cache) const override; void serializeBinaryBulkWithMultipleStreams( const IColumn & column, diff --git a/src/DataTypes/Serializations/SerializationNamed.cpp b/src/DataTypes/Serializations/SerializationNamed.cpp index 2792827e690..07f5f9ea7ed 100644 --- a/src/DataTypes/Serializations/SerializationNamed.cpp +++ b/src/DataTypes/Serializations/SerializationNamed.cpp @@ -54,10 +54,11 @@ void SerializationNamed::serializeBinaryBulkStateSuffix( void SerializationNamed::deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkSettings & settings, - DeserializeBinaryBulkStatePtr & state) const + DeserializeBinaryBulkStatePtr & state, + SubstreamsDeserializeStatesCache * cache) const { addToPath(settings.path); - nested_serialization->deserializeBinaryBulkStatePrefix(settings, state); + nested_serialization->deserializeBinaryBulkStatePrefix(settings, state, cache); settings.path.pop_back(); } diff --git a/src/DataTypes/Serializations/SerializationNamed.h b/src/DataTypes/Serializations/SerializationNamed.h index 0633ba2ea6f..bb2161e40e6 100644 --- a/src/DataTypes/Serializations/SerializationNamed.h +++ b/src/DataTypes/Serializations/SerializationNamed.h @@ -36,7 +36,8 @@ public: void deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkSettings & settings, - DeserializeBinaryBulkStatePtr & state) const override; + DeserializeBinaryBulkStatePtr & state, + SubstreamsDeserializeStatesCache * cache) const override; void serializeBinaryBulkWithMultipleStreams( const IColumn & column, diff --git a/src/DataTypes/Serializations/SerializationNullable.cpp b/src/DataTypes/Serializations/SerializationNullable.cpp index 4d31451f92d..e72dd3a42f5 100644 --- a/src/DataTypes/Serializations/SerializationNullable.cpp +++ b/src/DataTypes/Serializations/SerializationNullable.cpp @@ -95,10 +95,11 @@ void SerializationNullable::serializeBinaryBulkStateSuffix( void SerializationNullable::deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkSettings & settings, - DeserializeBinaryBulkStatePtr & state) const + DeserializeBinaryBulkStatePtr & state, + SubstreamsDeserializeStatesCache * cache) const { settings.path.push_back(Substream::NullableElements); - nested->deserializeBinaryBulkStatePrefix(settings, state); + nested->deserializeBinaryBulkStatePrefix(settings, state, cache); settings.path.pop_back(); } @@ -286,7 +287,7 @@ bool SerializationNullable::tryDeserializeNullRaw(DB::ReadBuffer & istr, const D } template -ReturnType deserializeTextEscapedAndRawImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested_serialization, bool & is_null) +ReturnType deserializeTextEscapedAndRawImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested_serialization, bool & is_null) { static constexpr bool throw_exception = std::is_same_v; @@ -319,10 +320,10 @@ ReturnType deserializeTextEscapedAndRawImpl(IColumn & column, ReadBuffer & istr, /// Check if we have enough data in buffer to check if it's a null. if (istr.available() > null_representation.size()) { - auto check_for_null = [&null_representation](ReadBuffer & buf) + auto check_for_null = [&null_representation, &settings](ReadBuffer & buf) { auto * pos = buf.position(); - if (checkString(null_representation, buf) && (*buf.position() == '\t' || *buf.position() == '\n')) + if (checkString(null_representation, buf) && (*buf.position() == '\t' || *buf.position() == '\n' || (settings.tsv.crlf_end_of_line_input && *buf.position() == '\r'))) return true; buf.position() = pos; return false; @@ -334,14 +335,14 @@ ReturnType deserializeTextEscapedAndRawImpl(IColumn & column, ReadBuffer & istr, /// Use PeekableReadBuffer to make a checkpoint before checking null /// representation and rollback if check was failed. PeekableReadBuffer peekable_buf(istr, true); - auto check_for_null = [&null_representation](ReadBuffer & buf_) + auto check_for_null = [&null_representation, &settings](ReadBuffer & buf_) { auto & buf = assert_cast(buf_); buf.setCheckpoint(); SCOPE_EXIT(buf.dropCheckpoint()); - if (checkString(null_representation, buf) && (buf.eof() || *buf.position() == '\t' || *buf.position() == '\n')) - return true; + if (checkString(null_representation, buf) && (buf.eof() || *buf.position() == '\t' || *buf.position() == '\n' || (settings.tsv.crlf_end_of_line_input && *buf.position() == '\r'))) + return true; buf.rollbackToCheckpoint(); return false; }; @@ -371,7 +372,10 @@ ReturnType deserializeTextEscapedAndRawImpl(IColumn & column, ReadBuffer & istr, if (null_representation.find('\t') != std::string::npos || null_representation.find('\n') != std::string::npos) throw DB::Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "TSV custom null representation " - "containing '\\t' or '\\n' may not work correctly for large input."); + "containing '\\t' or '\\n' may not work correctly for large input."); + if (settings.tsv.crlf_end_of_line_input && null_representation.find('\r') != std::string::npos) + throw DB::Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "TSV custom null representation " + "containing '\\r' may not work correctly for large input."); WriteBufferFromOwnString parsed_value; if constexpr (escaped) diff --git a/src/DataTypes/Serializations/SerializationNullable.h b/src/DataTypes/Serializations/SerializationNullable.h index 37858ccdefd..f7d2d2eadf0 100644 --- a/src/DataTypes/Serializations/SerializationNullable.h +++ b/src/DataTypes/Serializations/SerializationNullable.h @@ -29,7 +29,8 @@ public: void deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkSettings & settings, - DeserializeBinaryBulkStatePtr & state) const override; + DeserializeBinaryBulkStatePtr & state, + SubstreamsDeserializeStatesCache * cache) const override; void serializeBinaryBulkWithMultipleStreams( const IColumn & column, diff --git a/src/DataTypes/Serializations/SerializationObject.cpp b/src/DataTypes/Serializations/SerializationObject.cpp index 67bf7af7799..c6c87b5aa7b 100644 --- a/src/DataTypes/Serializations/SerializationObject.cpp +++ b/src/DataTypes/Serializations/SerializationObject.cpp @@ -104,9 +104,9 @@ void SerializationObject::deserializeWholeText(IColumn & column, ReadBuf } template -void SerializationObject::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const +void SerializationObject::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { - deserializeTextImpl(column, [&](String & s) { readEscapedString(s, istr); }); + deserializeTextImpl(column, [&](String & s) { settings.tsv.crlf_end_of_line_input ? readEscapedStringCRLF(s, istr) : readEscapedString(s, istr); }); } template @@ -210,7 +210,8 @@ void SerializationObject::serializeBinaryBulkStateSuffix( template void SerializationObject::deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkSettings & settings, - DeserializeBinaryBulkStatePtr & state) const + DeserializeBinaryBulkStatePtr & state, + SubstreamsDeserializeStatesCache * cache) const { checkSerializationIsSupported(settings); if (state) @@ -258,7 +259,7 @@ void SerializationObject::deserializeBinaryBulkStatePrefix( } settings.path.push_back(Substream::ObjectData); - state_object->nested_serialization->deserializeBinaryBulkStatePrefix(settings, state_object->nested_state); + state_object->nested_serialization->deserializeBinaryBulkStatePrefix(settings, state_object->nested_state, cache); settings.path.pop_back(); state = std::move(state_object); diff --git a/src/DataTypes/Serializations/SerializationObject.h b/src/DataTypes/Serializations/SerializationObject.h index 39e1c514640..4cb7d0ab6a8 100644 --- a/src/DataTypes/Serializations/SerializationObject.h +++ b/src/DataTypes/Serializations/SerializationObject.h @@ -41,7 +41,8 @@ public: void deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkSettings & settings, - DeserializeBinaryBulkStatePtr & state) const override; + DeserializeBinaryBulkStatePtr & state, + SubstreamsDeserializeStatesCache * cache) const override; void serializeBinaryBulkWithMultipleStreams( const IColumn & column, diff --git a/src/DataTypes/Serializations/SerializationSparse.cpp b/src/DataTypes/Serializations/SerializationSparse.cpp index 4d7514271ad..73488d308bb 100644 --- a/src/DataTypes/Serializations/SerializationSparse.cpp +++ b/src/DataTypes/Serializations/SerializationSparse.cpp @@ -152,7 +152,7 @@ void SerializationSparse::enumerateStreams( const StreamCallback & callback, const SubstreamData & data) const { - const auto * column_sparse = data.column ? &assert_cast(*data.column) : nullptr; + const auto * column_sparse = data.column ? typeid_cast(data.column.get()) : nullptr; size_t column_size = column_sparse ? column_sparse->size() : 0; settings.path.push_back(Substream::SparseOffsets); @@ -170,7 +170,7 @@ void SerializationSparse::enumerateStreams( auto next_data = SubstreamData(nested) .withType(data.type) - .withColumn(column_sparse ? column_sparse->getValuesPtr() : nullptr) + .withColumn(column_sparse ? column_sparse->getValuesPtr() : data.column) .withSerializationInfo(data.serialization_info); nested->enumerateStreams(settings, callback, next_data); @@ -242,12 +242,13 @@ void SerializationSparse::serializeBinaryBulkStateSuffix( void SerializationSparse::deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkSettings & settings, - DeserializeBinaryBulkStatePtr & state) const + DeserializeBinaryBulkStatePtr & state, + SubstreamsDeserializeStatesCache * cache) const { auto state_sparse = std::make_shared(); settings.path.push_back(Substream::SparseElements); - nested->deserializeBinaryBulkStatePrefix(settings, state_sparse->nested); + nested->deserializeBinaryBulkStatePrefix(settings, state_sparse->nested, cache); settings.path.pop_back(); state = std::move(state_sparse); diff --git a/src/DataTypes/Serializations/SerializationSparse.h b/src/DataTypes/Serializations/SerializationSparse.h index b1ed7b613f0..a55856bacf0 100644 --- a/src/DataTypes/Serializations/SerializationSparse.h +++ b/src/DataTypes/Serializations/SerializationSparse.h @@ -43,7 +43,8 @@ public: void deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkSettings & settings, - DeserializeBinaryBulkStatePtr & state) const override; + DeserializeBinaryBulkStatePtr & state, + SubstreamsDeserializeStatesCache * cache) const override; /// Allows to write ColumnSparse and other columns in sparse serialization. void serializeBinaryBulkWithMultipleStreams( diff --git a/src/DataTypes/Serializations/SerializationString.cpp b/src/DataTypes/Serializations/SerializationString.cpp index 8abaa3bd5ea..9e39ab23709 100644 --- a/src/DataTypes/Serializations/SerializationString.cpp +++ b/src/DataTypes/Serializations/SerializationString.cpp @@ -147,7 +147,6 @@ void SerializationString::serializeBinaryBulk(const IColumn & column, WriteBuffe } } - template static NO_INLINE void deserializeBinarySSE2(ColumnString::Chars & data, ColumnString::Offsets & offsets, ReadBuffer & istr, size_t limit) { @@ -324,14 +323,17 @@ bool SerializationString::tryDeserializeWholeText(IColumn & column, ReadBuffer & return read(column, [&](ColumnString::Chars & data) { readStringUntilEOFInto(data, istr); return true; }); } -void SerializationString::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const +void SerializationString::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { - read(column, [&](ColumnString::Chars & data) { readEscapedStringInto(data, istr); }); + read(column, [&](ColumnString::Chars & data) + { + settings.tsv.crlf_end_of_line_input ? readEscapedStringInto,true>(data, istr) : readEscapedStringInto,false>(data, istr); + }); } bool SerializationString::tryDeserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const { - return read(column, [&](ColumnString::Chars & data) { readEscapedStringInto(data, istr); return true; }); + return read(column, [&](ColumnString::Chars & data) { readEscapedStringInto,true>(data, istr); return true; }); } void SerializationString::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const diff --git a/src/DataTypes/Serializations/SerializationTuple.cpp b/src/DataTypes/Serializations/SerializationTuple.cpp index 632a019d2d9..ef0a75fac40 100644 --- a/src/DataTypes/Serializations/SerializationTuple.cpp +++ b/src/DataTypes/Serializations/SerializationTuple.cpp @@ -549,26 +549,6 @@ bool SerializationTuple::tryDeserializeTextCSV(IColumn & column, ReadBuffer & is return tryDeserializeText(column, rb, settings, true); } -void SerializationTuple::enumerateStreams( - EnumerateStreamsSettings & settings, - const StreamCallback & callback, - const SubstreamData & data) const -{ - const auto * type_tuple = data.type ? &assert_cast(*data.type) : nullptr; - const auto * column_tuple = data.column ? &assert_cast(*data.column) : nullptr; - const auto * info_tuple = data.serialization_info ? &assert_cast(*data.serialization_info) : nullptr; - - for (size_t i = 0; i < elems.size(); ++i) - { - auto next_data = SubstreamData(elems[i]) - .withType(type_tuple ? type_tuple->getElement(i) : nullptr) - .withColumn(column_tuple ? column_tuple->getColumnPtr(i) : nullptr) - .withSerializationInfo(info_tuple ? info_tuple->getElementInfo(i) : nullptr); - - elems[i]->enumerateStreams(settings, callback, next_data); - } -} - struct SerializeBinaryBulkStateTuple : public ISerialization::SerializeBinaryBulkState { std::vector states; @@ -579,6 +559,27 @@ struct DeserializeBinaryBulkStateTuple : public ISerialization::DeserializeBinar std::vector states; }; +void SerializationTuple::enumerateStreams( + EnumerateStreamsSettings & settings, + const StreamCallback & callback, + const SubstreamData & data) const +{ + const auto * type_tuple = data.type ? &assert_cast(*data.type) : nullptr; + const auto * column_tuple = data.column ? &assert_cast(*data.column) : nullptr; + const auto * info_tuple = data.serialization_info ? &assert_cast(*data.serialization_info) : nullptr; + const auto * tuple_deserialize_state = data.deserialize_state ? checkAndGetState(data.deserialize_state) : nullptr; + + for (size_t i = 0; i < elems.size(); ++i) + { + auto next_data = SubstreamData(elems[i]) + .withType(type_tuple ? type_tuple->getElement(i) : nullptr) + .withColumn(column_tuple ? column_tuple->getColumnPtr(i) : nullptr) + .withSerializationInfo(info_tuple ? info_tuple->getElementInfo(i) : nullptr) + .withDeserializeState(tuple_deserialize_state ? tuple_deserialize_state->states[i] : nullptr); + + elems[i]->enumerateStreams(settings, callback, next_data); + } +} void SerializationTuple::serializeBinaryBulkStatePrefix( const IColumn & column, @@ -606,13 +607,14 @@ void SerializationTuple::serializeBinaryBulkStateSuffix( void SerializationTuple::deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkSettings & settings, - DeserializeBinaryBulkStatePtr & state) const + DeserializeBinaryBulkStatePtr & state, + SubstreamsDeserializeStatesCache * cache) const { auto tuple_state = std::make_shared(); tuple_state->states.resize(elems.size()); for (size_t i = 0; i < elems.size(); ++i) - elems[i]->deserializeBinaryBulkStatePrefix(settings, tuple_state->states[i]); + elems[i]->deserializeBinaryBulkStatePrefix(settings, tuple_state->states[i], cache); state = std::move(tuple_state); } diff --git a/src/DataTypes/Serializations/SerializationTuple.h b/src/DataTypes/Serializations/SerializationTuple.h index d9c63a05217..810673d8b21 100644 --- a/src/DataTypes/Serializations/SerializationTuple.h +++ b/src/DataTypes/Serializations/SerializationTuple.h @@ -53,7 +53,8 @@ public: void deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkSettings & settings, - DeserializeBinaryBulkStatePtr & state) const override; + DeserializeBinaryBulkStatePtr & state, + SubstreamsDeserializeStatesCache * cache) const override; void serializeBinaryBulkWithMultipleStreams( const IColumn & column, diff --git a/src/DataTypes/Serializations/SerializationVariant.cpp b/src/DataTypes/Serializations/SerializationVariant.cpp index 300686ff8d3..b386fd8ab45 100644 --- a/src/DataTypes/Serializations/SerializationVariant.cpp +++ b/src/DataTypes/Serializations/SerializationVariant.cpp @@ -28,6 +28,16 @@ namespace ErrorCodes extern const int INCORRECT_DATA; } +struct SerializeBinaryBulkStateVariant : public ISerialization::SerializeBinaryBulkState +{ + std::vector states; +}; + +struct DeserializeBinaryBulkStateVariant : public ISerialization::DeserializeBinaryBulkState +{ + std::vector states; +}; + void SerializationVariant::enumerateStreams( EnumerateStreamsSettings & settings, const StreamCallback & callback, @@ -35,6 +45,7 @@ void SerializationVariant::enumerateStreams( { const auto * type_variant = data.type ? &assert_cast(*data.type) : nullptr; const auto * column_variant = data.column ? &assert_cast(*data.column) : nullptr; + const auto * variant_deserialize_state = data.deserialize_state ? checkAndGetState(data.deserialize_state) : nullptr; auto discriminators_serialization = std::make_shared(std::make_shared>(), "discr", SubstreamType::NamedVariantDiscriminators); auto local_discriminators = column_variant ? column_variant->getLocalDiscriminatorsPtr() : nullptr; @@ -59,7 +70,8 @@ void SerializationVariant::enumerateStreams( auto variant_data = SubstreamData(variants[i]) .withType(type_variant ? type_variant->getVariant(i) : nullptr) .withColumn(column_variant ? column_variant->getVariantPtrByGlobalDiscriminator(i) : nullptr) - .withSerializationInfo(data.serialization_info); + .withSerializationInfo(data.serialization_info) + .withDeserializeState(variant_deserialize_state ? variant_deserialize_state->states[i] : nullptr); addVariantElementToPath(settings.path, i); settings.path.back().data = variant_data; @@ -70,16 +82,6 @@ void SerializationVariant::enumerateStreams( settings.path.pop_back(); } -struct SerializeBinaryBulkStateVariant : public ISerialization::SerializeBinaryBulkState -{ - std::vector states; -}; - -struct DeserializeBinaryBulkStateVariant : public ISerialization::DeserializeBinaryBulkState -{ - std::vector states; -}; - void SerializationVariant::serializeBinaryBulkStatePrefix( const IColumn & column, SerializeBinaryBulkSettings & settings, @@ -123,7 +125,8 @@ void SerializationVariant::serializeBinaryBulkStateSuffix( void SerializationVariant::deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkSettings & settings, - DeserializeBinaryBulkStatePtr & state) const + DeserializeBinaryBulkStatePtr & state, + SubstreamsDeserializeStatesCache * cache) const { auto variant_state = std::make_shared(); variant_state->states.resize(variants.size()); @@ -132,7 +135,7 @@ void SerializationVariant::deserializeBinaryBulkStatePrefix( for (size_t i = 0; i < variants.size(); ++i) { addVariantElementToPath(settings.path, i); - variants[i]->deserializeBinaryBulkStatePrefix(settings, variant_state->states[i]); + variants[i]->deserializeBinaryBulkStatePrefix(settings, variant_state->states[i], cache); settings.path.pop_back(); } @@ -141,12 +144,13 @@ void SerializationVariant::deserializeBinaryBulkStatePrefix( } -void SerializationVariant::serializeBinaryBulkWithMultipleStreams( +void SerializationVariant::serializeBinaryBulkWithMultipleStreamsAndUpdateVariantStatistics( const IColumn & column, size_t offset, size_t limit, SerializeBinaryBulkSettings & settings, - SerializeBinaryBulkStatePtr & state) const + SerializeBinaryBulkStatePtr & state, + std::unordered_map & variants_statistics) const { const ColumnVariant & col = assert_cast(column); if (const size_t size = col.size(); limit == 0 || offset + limit > size) @@ -185,6 +189,7 @@ void SerializationVariant::serializeBinaryBulkWithMultipleStreams( { addVariantElementToPath(settings.path, i); variants[i]->serializeBinaryBulkWithMultipleStreams(col.getVariantByGlobalDiscriminator(i), 0, 0, settings, variant_state->states[i]); + variants_statistics[variant_names[i]] += col.getVariantByGlobalDiscriminator(i).size(); settings.path.pop_back(); } settings.path.pop_back(); @@ -205,6 +210,7 @@ void SerializationVariant::serializeBinaryBulkWithMultipleStreams( addVariantElementToPath(settings.path, non_empty_global_discr); /// We can use the same offset/limit as for whole Variant column variants[non_empty_global_discr]->serializeBinaryBulkWithMultipleStreams(col.getVariantByGlobalDiscriminator(non_empty_global_discr), offset, limit, settings, variant_state->states[non_empty_global_discr]); + variants_statistics[variant_names[non_empty_global_discr]] += limit; settings.path.pop_back(); settings.path.pop_back(); return; @@ -244,12 +250,23 @@ void SerializationVariant::serializeBinaryBulkWithMultipleStreams( variant_offsets_and_limits[i].second, settings, variant_state->states[i]); + variants_statistics[variant_names[i]] += variant_offsets_and_limits[i].second; settings.path.pop_back(); } } settings.path.pop_back(); } +void SerializationVariant::serializeBinaryBulkWithMultipleStreams( + const DB::IColumn & column, + size_t offset, + size_t limit, + DB::ISerialization::SerializeBinaryBulkSettings & settings, + DB::ISerialization::SerializeBinaryBulkStatePtr & state) const +{ + std::unordered_map tmp_statistics; + serializeBinaryBulkWithMultipleStreamsAndUpdateVariantStatistics(column, offset, limit, settings, state, tmp_statistics); +} void SerializationVariant::deserializeBinaryBulkWithMultipleStreams( ColumnPtr & column, @@ -599,14 +616,14 @@ void SerializationVariant::serializeTextEscaped(const IColumn & column, size_t r bool SerializationVariant::tryDeserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { String field; - readEscapedString(field, istr); + settings.tsv.crlf_end_of_line_input ? readEscapedStringCRLF(field, istr) : readEscapedString(field, istr); return tryDeserializeTextEscapedImpl(column, field, settings); } void SerializationVariant::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { String field; - readEscapedString(field, istr); + settings.tsv.crlf_end_of_line_input ? readEscapedStringCRLF(field, istr) : readEscapedString(field, istr); if (!tryDeserializeTextEscapedImpl(column, field, settings)) throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot parse escaped value of type {} here: {}", variant_name, field); } diff --git a/src/DataTypes/Serializations/SerializationVariant.h b/src/DataTypes/Serializations/SerializationVariant.h index 3f53dcf1339..b6aa1534538 100644 --- a/src/DataTypes/Serializations/SerializationVariant.h +++ b/src/DataTypes/Serializations/SerializationVariant.h @@ -59,7 +59,8 @@ public: void deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkSettings & settings, - DeserializeBinaryBulkStatePtr & state) const override; + DeserializeBinaryBulkStatePtr & state, + SubstreamsDeserializeStatesCache * cache) const override; void serializeBinaryBulkWithMultipleStreams( const IColumn & column, @@ -68,6 +69,14 @@ public: SerializeBinaryBulkSettings & settings, SerializeBinaryBulkStatePtr & state) const override; + void serializeBinaryBulkWithMultipleStreamsAndUpdateVariantStatistics( + const IColumn & column, + size_t offset, + size_t limit, + SerializeBinaryBulkSettings & settings, + SerializeBinaryBulkStatePtr & state, + std::unordered_map & variants_statistics) const; + void deserializeBinaryBulkWithMultipleStreams( ColumnPtr & column, size_t limit, diff --git a/src/DataTypes/Serializations/SerializationVariantElement.cpp b/src/DataTypes/Serializations/SerializationVariantElement.cpp index 7d4487fe6da..1f9a81ac671 100644 --- a/src/DataTypes/Serializations/SerializationVariantElement.cpp +++ b/src/DataTypes/Serializations/SerializationVariantElement.cpp @@ -2,6 +2,7 @@ #include #include #include +#include namespace DB { @@ -11,34 +12,6 @@ namespace ErrorCodes extern const int NOT_IMPLEMENTED; } -void SerializationVariantElement::enumerateStreams( - DB::ISerialization::EnumerateStreamsSettings & settings, - const DB::ISerialization::StreamCallback & callback, - const DB::ISerialization::SubstreamData & data) const -{ - /// We will need stream for discriminators during deserialization. - settings.path.push_back(Substream::VariantDiscriminators); - callback(settings.path); - settings.path.pop_back(); - - addVariantToPath(settings.path); - settings.path.back().data = data; - nested_serialization->enumerateStreams(settings, callback, data); - removeVariantFromPath(settings.path); -} - -void SerializationVariantElement::serializeBinaryBulkStatePrefix(const IColumn &, SerializeBinaryBulkSettings &, SerializeBinaryBulkStatePtr &) const -{ - throw Exception( - ErrorCodes::NOT_IMPLEMENTED, "Method serializeBinaryBulkStatePrefix is not implemented for SerializationVariantElement"); -} - -void SerializationVariantElement::serializeBinaryBulkStateSuffix(SerializeBinaryBulkSettings &, SerializeBinaryBulkStatePtr &) const -{ - throw Exception( - ErrorCodes::NOT_IMPLEMENTED, "Method serializeBinaryBulkStateSuffix is not implemented for SerializationVariantElement"); -} - struct DeserializeBinaryBulkStateVariantElement : public ISerialization::DeserializeBinaryBulkState { /// During deserialization discriminators and variant streams can be shared. @@ -55,12 +28,47 @@ struct DeserializeBinaryBulkStateVariantElement : public ISerialization::Deseria ISerialization::DeserializeBinaryBulkStatePtr variant_element_state; }; -void SerializationVariantElement::deserializeBinaryBulkStatePrefix(DeserializeBinaryBulkSettings & settings, DeserializeBinaryBulkStatePtr & state) const +void SerializationVariantElement::enumerateStreams( + DB::ISerialization::EnumerateStreamsSettings & settings, + const DB::ISerialization::StreamCallback & callback, + const DB::ISerialization::SubstreamData & data) const +{ + /// We will need stream for discriminators during deserialization. + settings.path.push_back(Substream::VariantDiscriminators); + callback(settings.path); + settings.path.pop_back(); + + const auto * deserialize_state = data.deserialize_state ? checkAndGetState(data.deserialize_state) : nullptr; + addVariantToPath(settings.path); + auto nested_data = SubstreamData(nested_serialization) + .withType(data.type ? removeNullableOrLowCardinalityNullable(data.type) : nullptr) + .withColumn(data.column ? removeNullableOrLowCardinalityNullable(data.column) : nullptr) + .withSerializationInfo(data.serialization_info) + .withDeserializeState(deserialize_state ? deserialize_state->variant_element_state : nullptr); + settings.path.back().data = nested_data; + nested_serialization->enumerateStreams(settings, callback, nested_data); + removeVariantFromPath(settings.path); +} + +void SerializationVariantElement::serializeBinaryBulkStatePrefix(const IColumn &, SerializeBinaryBulkSettings &, SerializeBinaryBulkStatePtr &) const +{ + throw Exception( + ErrorCodes::NOT_IMPLEMENTED, "Method serializeBinaryBulkStatePrefix is not implemented for SerializationVariantElement"); +} + +void SerializationVariantElement::serializeBinaryBulkStateSuffix(SerializeBinaryBulkSettings &, SerializeBinaryBulkStatePtr &) const +{ + throw Exception( + ErrorCodes::NOT_IMPLEMENTED, "Method serializeBinaryBulkStateSuffix is not implemented for SerializationVariantElement"); +} + +void SerializationVariantElement::deserializeBinaryBulkStatePrefix( + DeserializeBinaryBulkSettings & settings, DeserializeBinaryBulkStatePtr & state, SubstreamsDeserializeStatesCache * cache) const { auto variant_element_state = std::make_shared(); addVariantToPath(settings.path); - nested_serialization->deserializeBinaryBulkStatePrefix(settings, variant_element_state->variant_element_state); + nested_serialization->deserializeBinaryBulkStatePrefix(settings, variant_element_state->variant_element_state, cache); removeVariantFromPath(settings.path); state = std::move(variant_element_state); diff --git a/src/DataTypes/Serializations/SerializationVariantElement.h b/src/DataTypes/Serializations/SerializationVariantElement.h index aafecf43d39..0ce0a72e250 100644 --- a/src/DataTypes/Serializations/SerializationVariantElement.h +++ b/src/DataTypes/Serializations/SerializationVariantElement.h @@ -43,7 +43,8 @@ public: void deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkSettings & settings, - DeserializeBinaryBulkStatePtr & state) const override; + DeserializeBinaryBulkStatePtr & state, + SubstreamsDeserializeStatesCache * cache) const override; void serializeBinaryBulkWithMultipleStreams( const IColumn & column, @@ -59,12 +60,6 @@ public: DeserializeBinaryBulkStatePtr & state, SubstreamsCache * cache) const override; -private: - friend SerializationVariant; - - void addVariantToPath(SubstreamPath & path) const; - void removeVariantFromPath(SubstreamPath & path) const; - struct VariantSubcolumnCreator : public ISubcolumnCreator { const ColumnPtr local_discriminators; @@ -82,6 +77,11 @@ private: ColumnPtr create(const ColumnPtr & prev) const override; SerializationPtr create(const SerializationPtr & prev) const override; }; +private: + friend SerializationVariant; + + void addVariantToPath(SubstreamPath & path) const; + void removeVariantFromPath(SubstreamPath & path) const; }; } diff --git a/src/DataTypes/Serializations/SerializationWrapper.cpp b/src/DataTypes/Serializations/SerializationWrapper.cpp index bde52bb8096..ecef533d7e0 100644 --- a/src/DataTypes/Serializations/SerializationWrapper.cpp +++ b/src/DataTypes/Serializations/SerializationWrapper.cpp @@ -29,9 +29,10 @@ void SerializationWrapper::serializeBinaryBulkStateSuffix( void SerializationWrapper::deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkSettings & settings, - DeserializeBinaryBulkStatePtr & state) const + DeserializeBinaryBulkStatePtr & state, + SubstreamsDeserializeStatesCache * cache) const { - nested_serialization->deserializeBinaryBulkStatePrefix(settings, state); + nested_serialization->deserializeBinaryBulkStatePrefix(settings, state, cache); } void SerializationWrapper::serializeBinaryBulkWithMultipleStreams( diff --git a/src/DataTypes/Serializations/SerializationWrapper.h b/src/DataTypes/Serializations/SerializationWrapper.h index 6c5e2046062..882f17bba0a 100644 --- a/src/DataTypes/Serializations/SerializationWrapper.h +++ b/src/DataTypes/Serializations/SerializationWrapper.h @@ -36,7 +36,8 @@ public: void deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkSettings & settings, - DeserializeBinaryBulkStatePtr & state) const override; + DeserializeBinaryBulkStatePtr & state, + SubstreamsDeserializeStatesCache * cache) const override; void serializeBinaryBulkWithMultipleStreams( const IColumn & column, diff --git a/src/DataTypes/Serializations/tests/gtest_object_serialization.cpp b/src/DataTypes/Serializations/tests/gtest_object_serialization.cpp index fc7432d5bf6..c6337a31fce 100644 --- a/src/DataTypes/Serializations/tests/gtest_object_serialization.cpp +++ b/src/DataTypes/Serializations/tests/gtest_object_serialization.cpp @@ -49,7 +49,7 @@ TEST(SerializationObject, FromString) settings.position_independent_encoding = false; settings.getter = [&in](const auto &) { return ∈ }; - serialization->deserializeBinaryBulkStatePrefix(settings, state); + serialization->deserializeBinaryBulkStatePrefix(settings, state, nullptr); serialization->deserializeBinaryBulkWithMultipleStreams(result_column, column_string->size(), settings, state, nullptr); } diff --git a/src/DataTypes/Utils.cpp b/src/DataTypes/Utils.cpp index 2f29d57d454..e7e69e379af 100644 --- a/src/DataTypes/Utils.cpp +++ b/src/DataTypes/Utils.cpp @@ -224,6 +224,7 @@ bool canBeSafelyCasted(const DataTypePtr & from_type, const DataTypePtr & to_typ case TypeIndex::Nothing: case TypeIndex::JSONPaths: case TypeIndex::Variant: + case TypeIndex::Dynamic: return false; } diff --git a/src/DataTypes/getLeastSupertype.cpp b/src/DataTypes/getLeastSupertype.cpp index 0977bea362c..a71b19d6c92 100644 --- a/src/DataTypes/getLeastSupertype.cpp +++ b/src/DataTypes/getLeastSupertype.cpp @@ -19,6 +19,7 @@ #include #include #include +#include namespace DB @@ -256,6 +257,24 @@ DataTypePtr getLeastSupertype(const DataTypes & types) return types[0]; } + /// If one of the types is Dynamic, the supertype is Dynamic + { + bool have_dynamic = false; + size_t max_dynamic_types = 0; + + for (const auto & type : types) + { + if (const auto & dynamic_type = typeid_cast(type.get())) + { + have_dynamic = true; + max_dynamic_types = std::max(max_dynamic_types, dynamic_type->getMaxDynamicTypes()); + } + } + + if (have_dynamic) + return std::make_shared(max_dynamic_types); + } + /// Recursive rules /// If there are Nothing types, skip them diff --git a/src/Databases/DatabaseHDFS.cpp b/src/Databases/DatabaseHDFS.cpp index 1de7f80f512..060991d1290 100644 --- a/src/Databases/DatabaseHDFS.cpp +++ b/src/Databases/DatabaseHDFS.cpp @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Databases/DatabaseLazy.cpp b/src/Databases/DatabaseLazy.cpp index fb1b3ee626b..e72834eddbe 100644 --- a/src/Databases/DatabaseLazy.cpp +++ b/src/Databases/DatabaseLazy.cpp @@ -1,6 +1,14 @@ +#include + +#include +#include +#include +#include +#include +#include +#include #include #include -#include #include #include #include @@ -10,13 +18,7 @@ #include #include #include -#include -#include -#include -#include -#include -#include namespace fs = std::filesystem; diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp index 161be35f129..5cb4198e1a2 100644 --- a/src/Databases/DatabaseOnDisk.cpp +++ b/src/Databases/DatabaseOnDisk.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -326,31 +327,36 @@ void DatabaseOnDisk::dropTable(ContextPtr local_context, const String & table_na StoragePtr table = detachTable(local_context, table_name); - /// This is possible for Lazy database. - if (!table) - return; - bool renamed = false; try { fs::rename(table_metadata_path, table_metadata_path_drop); renamed = true; - table->drop(); - table->is_dropped = true; - - fs::path table_data_dir(local_context->getPath() + table_data_path_relative); - if (fs::exists(table_data_dir)) - (void)fs::remove_all(table_data_dir); + // The table might be not loaded for Lazy database engine. + if (table) + { + table->drop(); + table->is_dropped = true; + } } catch (...) { LOG_WARNING(log, getCurrentExceptionMessageAndPattern(/* with_stacktrace */ true)); - attachTable(local_context, table_name, table, table_data_path_relative); + if (table) + attachTable(local_context, table_name, table, table_data_path_relative); if (renamed) fs::rename(table_metadata_path_drop, table_metadata_path); throw; } + for (const auto & [disk_name, disk] : getContext()->getDisksMap()) + { + if (disk->isReadOnly() || !disk->exists(table_data_path_relative)) + continue; + + LOG_INFO(log, "Removing data directory from disk {} with path {} for dropped table {} ", disk_name, table_data_path_relative, table_name); + disk->removeRecursive(table_data_path_relative); + } (void)fs::remove(table_metadata_path_drop); } diff --git a/src/Databases/DatabaseOrdinary.cpp b/src/Databases/DatabaseOrdinary.cpp index 58fa7f01947..5d36f1cc3d6 100644 --- a/src/Databases/DatabaseOrdinary.cpp +++ b/src/Databases/DatabaseOrdinary.cpp @@ -76,20 +76,6 @@ static void setReplicatedEngine(ASTCreateQuery * create_query, ContextPtr contex String replica_path = server_settings.default_replica_path; String replica_name = server_settings.default_replica_name; - /// Check that replica path doesn't exist - Macros::MacroExpansionInfo info; - StorageID table_id = StorageID(create_query->getDatabase(), create_query->getTable(), create_query->uuid); - info.table_id = table_id; - info.expand_special_macros_only = false; - - String zookeeper_path = context->getMacros()->expand(replica_path, info); - if (context->getZooKeeper()->exists(zookeeper_path)) - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "Found existing ZooKeeper path {} while trying to convert table {} to replicated. Table will not be converted.", - zookeeper_path, backQuote(table_id.getFullTableName()) - ); - auto args = std::make_shared(); args->children.push_back(std::make_shared(replica_path)); args->children.push_back(std::make_shared(replica_name)); diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp index 78d502ec2c7..cc946fc22c4 100644 --- a/src/Databases/DatabaseReplicated.cpp +++ b/src/Databases/DatabaseReplicated.cpp @@ -929,6 +929,7 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep query_context->setSetting("allow_experimental_hash_functions", 1); query_context->setSetting("allow_experimental_object_type", 1); query_context->setSetting("allow_experimental_variant_type", 1); + query_context->setSetting("allow_experimental_dynamic_type", 1); query_context->setSetting("allow_experimental_annoy_index", 1); query_context->setSetting("allow_experimental_usearch_index", 1); query_context->setSetting("allow_experimental_bigint_types", 1); diff --git a/src/Databases/DatabasesCommon.cpp b/src/Databases/DatabasesCommon.cpp index fc75f8e44b9..5fee14ecc2a 100644 --- a/src/Databases/DatabasesCommon.cpp +++ b/src/Databases/DatabasesCommon.cpp @@ -1,4 +1,10 @@ #include + +#include +#include +#include +#include +#include #include #include #include @@ -8,17 +14,8 @@ #include #include #include -#include -#include -#include +#include #include -#include -#include - -namespace CurrentMetrics -{ - extern const Metric AttachedTable; -} namespace DB @@ -263,7 +260,7 @@ StoragePtr DatabaseWithOwnTablesBase::detachTableUnlocked(const String & table_n res = it->second; tables.erase(it); res->is_detached = true; - CurrentMetrics::sub(CurrentMetrics::AttachedTable, 1); + CurrentMetrics::sub(getAttachedCounterForStorage(res), 1); auto table_id = res->getStorageID(); if (table_id.hasUUID()) @@ -304,7 +301,7 @@ void DatabaseWithOwnTablesBase::attachTableUnlocked(const String & table_name, c /// It is important to reset is_detached here since in case of RENAME in /// non-Atomic database the is_detached is set to true before RENAME. table->is_detached = false; - CurrentMetrics::add(CurrentMetrics::AttachedTable, 1); + CurrentMetrics::add(getAttachedCounterForStorage(table), 1); } void DatabaseWithOwnTablesBase::shutdown() diff --git a/src/Dictionaries/CacheDictionaryStorage.h b/src/Dictionaries/CacheDictionaryStorage.h index 01217c58e31..a960a916027 100644 --- a/src/Dictionaries/CacheDictionaryStorage.h +++ b/src/Dictionaries/CacheDictionaryStorage.h @@ -754,7 +754,7 @@ private: std::vector attributes; - inline void setCellDeadline(Cell & cell, TimePoint now) + void setCellDeadline(Cell & cell, TimePoint now) { if (configuration.lifetime.min_sec == 0 && configuration.lifetime.max_sec == 0) { @@ -774,7 +774,7 @@ private: cell.deadline = std::chrono::system_clock::to_time_t(deadline); } - inline size_t getCellIndex(const KeyType key) const + size_t getCellIndex(const KeyType key) const { const size_t hash = DefaultHash()(key); const size_t index = hash & size_overlap_mask; @@ -783,7 +783,7 @@ private: using KeyStateAndCellIndex = std::pair; - inline KeyStateAndCellIndex getKeyStateAndCellIndex(const KeyType key, const time_t now) const + KeyStateAndCellIndex getKeyStateAndCellIndex(const KeyType key, const time_t now) const { size_t place_value = getCellIndex(key); const size_t place_value_end = place_value + max_collision_length; @@ -810,7 +810,7 @@ private: return std::make_pair(KeyState::not_found, place_value & size_overlap_mask); } - inline size_t getCellIndexForInsert(const KeyType & key) const + size_t getCellIndexForInsert(const KeyType & key) const { size_t place_value = getCellIndex(key); const size_t place_value_end = place_value + max_collision_length; diff --git a/src/Dictionaries/DictionaryHelpers.h b/src/Dictionaries/DictionaryHelpers.h index 8bf190d3edc..64fc05e99ab 100644 --- a/src/Dictionaries/DictionaryHelpers.h +++ b/src/Dictionaries/DictionaryHelpers.h @@ -44,7 +44,7 @@ public: { } - inline bool isConstant() const { return default_values_column == nullptr; } + bool isConstant() const { return default_values_column == nullptr; } Field getDefaultValue(size_t row) const { @@ -450,17 +450,17 @@ public: keys_size = key_columns.front()->size(); } - inline size_t getKeysSize() const + size_t getKeysSize() const { return keys_size; } - inline size_t getCurrentKeyIndex() const + size_t getCurrentKeyIndex() const { return current_key_index; } - inline KeyType extractCurrentKey() + KeyType extractCurrentKey() { assert(current_key_index < keys_size); diff --git a/src/Dictionaries/Embedded/RegionsNames.h b/src/Dictionaries/Embedded/RegionsNames.h index 0053c74745a..0e4c1fe8b88 100644 --- a/src/Dictionaries/Embedded/RegionsNames.h +++ b/src/Dictionaries/Embedded/RegionsNames.h @@ -48,14 +48,14 @@ public: }; private: - static inline constexpr const char * languages[] = + static constexpr const char * languages[] = { #define M(NAME, FALLBACK, NUM) #NAME, FOR_EACH_LANGUAGE(M) #undef M }; - static inline constexpr Language fallbacks[] = + static constexpr Language fallbacks[] = { #define M(NAME, FALLBACK, NUM) Language::FALLBACK, FOR_EACH_LANGUAGE(M) diff --git a/src/Dictionaries/ICacheDictionaryStorage.h b/src/Dictionaries/ICacheDictionaryStorage.h index dcd7434946f..532154cd190 100644 --- a/src/Dictionaries/ICacheDictionaryStorage.h +++ b/src/Dictionaries/ICacheDictionaryStorage.h @@ -26,15 +26,15 @@ struct KeyState : state(state_) {} - inline bool isFound() const { return state == State::found; } - inline bool isExpired() const { return state == State::expired; } - inline bool isNotFound() const { return state == State::not_found; } - inline bool isDefault() const { return is_default; } - inline void setDefault() { is_default = true; } - inline void setDefaultValue(bool is_default_value) { is_default = is_default_value; } + bool isFound() const { return state == State::found; } + bool isExpired() const { return state == State::expired; } + bool isNotFound() const { return state == State::not_found; } + bool isDefault() const { return is_default; } + void setDefault() { is_default = true; } + void setDefaultValue(bool is_default_value) { is_default = is_default_value; } /// Valid only if keyState is found or expired - inline size_t getFetchedColumnIndex() const { return fetched_column_index; } - inline void setFetchedColumnIndex(size_t fetched_column_index_value) { fetched_column_index = fetched_column_index_value; } + size_t getFetchedColumnIndex() const { return fetched_column_index; } + void setFetchedColumnIndex(size_t fetched_column_index_value) { fetched_column_index = fetched_column_index_value; } private: State state = not_found; size_t fetched_column_index = 0; diff --git a/src/Dictionaries/IPAddressDictionary.cpp b/src/Dictionaries/IPAddressDictionary.cpp index 1bc6d16c932..a67118caaf8 100644 --- a/src/Dictionaries/IPAddressDictionary.cpp +++ b/src/Dictionaries/IPAddressDictionary.cpp @@ -66,7 +66,7 @@ namespace return buf; } - inline UInt8 prefixIPv6() const + UInt8 prefixIPv6() const { return isv6 ? prefix : prefix + 96; } diff --git a/src/Dictionaries/RegExpTreeDictionary.cpp b/src/Dictionaries/RegExpTreeDictionary.cpp index 2e93a8e6001..ab999202e42 100644 --- a/src/Dictionaries/RegExpTreeDictionary.cpp +++ b/src/Dictionaries/RegExpTreeDictionary.cpp @@ -474,7 +474,7 @@ public: } // Checks if no more values can be added for a given attribute - inline bool full(const String & attr_name, std::unordered_set * const defaults = nullptr) const + bool full(const String & attr_name, std::unordered_set * const defaults = nullptr) const { if (collect_values_limit) { @@ -490,7 +490,7 @@ public: } // Returns the number of full attributes - inline size_t attributesFull() const { return n_full_attributes; } + size_t attributesFull() const { return n_full_attributes; } }; std::pair processBackRefs(const String & data, const re2::RE2 & searcher, const std::vector & pieces) diff --git a/src/Dictionaries/SSDCacheDictionaryStorage.h b/src/Dictionaries/SSDCacheDictionaryStorage.h index f0b56cbf529..e96bdc4ac55 100644 --- a/src/Dictionaries/SSDCacheDictionaryStorage.h +++ b/src/Dictionaries/SSDCacheDictionaryStorage.h @@ -134,7 +134,7 @@ public: /// Reset block with new block_data /// block_data must be filled with zeroes if it is new block - inline void reset(char * new_block_data) + void reset(char * new_block_data) { block_data = new_block_data; current_block_offset = block_header_size; @@ -142,13 +142,13 @@ public: } /// Check if it is enough place to write key in block - inline bool enoughtPlaceToWriteKey(const SSDCacheSimpleKey & cache_key) const + bool enoughtPlaceToWriteKey(const SSDCacheSimpleKey & cache_key) const { return (current_block_offset + (sizeof(cache_key.key) + sizeof(cache_key.size) + cache_key.size)) <= block_size; } /// Check if it is enough place to write key in block - inline bool enoughtPlaceToWriteKey(const SSDCacheComplexKey & cache_key) const + bool enoughtPlaceToWriteKey(const SSDCacheComplexKey & cache_key) const { const StringRef & key = cache_key.key; size_t complex_key_size = sizeof(key.size) + key.size; @@ -159,7 +159,7 @@ public: /// Write key and returns offset in ssd cache block where data is written /// It is client responsibility to check if there is enough place in block to write key /// Returns true if key was written and false if there was not enough place to write key - inline bool writeKey(const SSDCacheSimpleKey & cache_key, size_t & offset_in_block) + bool writeKey(const SSDCacheSimpleKey & cache_key, size_t & offset_in_block) { assert(cache_key.size > 0); @@ -188,7 +188,7 @@ public: return true; } - inline bool writeKey(const SSDCacheComplexKey & cache_key, size_t & offset_in_block) + bool writeKey(const SSDCacheComplexKey & cache_key, size_t & offset_in_block) { assert(cache_key.size > 0); @@ -223,20 +223,20 @@ public: return true; } - inline size_t getKeysSize() const { return keys_size; } + size_t getKeysSize() const { return keys_size; } /// Write keys size into block header - inline void writeKeysSize() + void writeKeysSize() { char * keys_size_offset_data = block_data + block_header_check_sum_size; std::memcpy(keys_size_offset_data, &keys_size, sizeof(size_t)); } /// Get check sum from block header - inline size_t getCheckSum() const { return unalignedLoad(block_data); } + size_t getCheckSum() const { return unalignedLoad(block_data); } /// Calculate check sum in block - inline size_t calculateCheckSum() const + size_t calculateCheckSum() const { size_t calculated_check_sum = static_cast(CityHash_v1_0_2::CityHash64(block_data + block_header_check_sum_size, block_size - block_header_check_sum_size)); @@ -244,7 +244,7 @@ public: } /// Check if check sum from block header matched calculated check sum in block - inline bool checkCheckSum() const + bool checkCheckSum() const { size_t calculated_check_sum = calculateCheckSum(); size_t check_sum = getCheckSum(); @@ -253,16 +253,16 @@ public: } /// Write check sum in block header - inline void writeCheckSum() + void writeCheckSum() { size_t check_sum = static_cast(CityHash_v1_0_2::CityHash64(block_data + block_header_check_sum_size, block_size - block_header_check_sum_size)); std::memcpy(block_data, &check_sum, sizeof(size_t)); } - inline size_t getBlockSize() const { return block_size; } + size_t getBlockSize() const { return block_size; } /// Returns block data - inline char * getBlockData() const { return block_data; } + char * getBlockData() const { return block_data; } /// Read keys that were serialized in block /// It is client responsibility to ensure that simple or complex keys were written in block @@ -405,16 +405,16 @@ public: current_write_block.writeCheckSum(); } - inline char * getPlace(SSDCacheIndex index) const + char * getPlace(SSDCacheIndex index) const { return buffer.m_data + index.block_index * block_size + index.offset_in_block; } - inline size_t getCurrentBlockIndex() const { return current_block_index; } + size_t getCurrentBlockIndex() const { return current_block_index; } - inline const char * getData() const { return buffer.m_data; } + const char * getData() const { return buffer.m_data; } - inline size_t getSizeInBytes() const { return block_size * partition_blocks_size; } + size_t getSizeInBytes() const { return block_size * partition_blocks_size; } void readKeys(PaddedPODArray & keys) const { @@ -431,7 +431,7 @@ public: } } - inline void reset() + void reset() { current_block_index = 0; current_write_block.reset(buffer.m_data); @@ -750,9 +750,9 @@ public: } } - inline size_t getCurrentBlockIndex() const { return current_block_index; } + size_t getCurrentBlockIndex() const { return current_block_index; } - inline void reset() + void reset() { current_block_index = 0; } @@ -788,7 +788,7 @@ private: int fd = -1; }; - inline static int preallocateDiskSpace(int fd, size_t offset, size_t len) + static int preallocateDiskSpace(int fd, size_t offset, size_t len) { #if defined(OS_FREEBSD) return posix_fallocate(fd, offset, len); @@ -797,7 +797,7 @@ private: #endif } - inline static char * getRequestBuffer(const iocb & request) + static char * getRequestBuffer(const iocb & request) { char * result = nullptr; @@ -810,7 +810,7 @@ private: return result; } - inline static ssize_t eventResult(io_event & event) + static ssize_t eventResult(io_event & event) { ssize_t bytes_written; @@ -985,9 +985,9 @@ private: size_t in_memory_partition_index; CellState state; - inline bool isInMemory() const { return state == in_memory; } - inline bool isOnDisk() const { return state == on_disk; } - inline bool isDefaultValue() const { return state == default_value; } + bool isInMemory() const { return state == in_memory; } + bool isOnDisk() const { return state == on_disk; } + bool isDefaultValue() const { return state == default_value; } }; struct KeyToBlockOffset @@ -1366,7 +1366,7 @@ private: } } - inline void setCellDeadline(Cell & cell, TimePoint now) + void setCellDeadline(Cell & cell, TimePoint now) { if (configuration.lifetime.min_sec == 0 && configuration.lifetime.max_sec == 0) { @@ -1383,7 +1383,7 @@ private: cell.deadline = std::chrono::system_clock::to_time_t(deadline); } - inline void eraseKeyFromIndex(KeyType key) + void eraseKeyFromIndex(KeyType key) { auto it = index.find(key); diff --git a/src/Disks/DiskEncrypted.h b/src/Disks/DiskEncrypted.h index 27000dcc8af..9b575c65bce 100644 --- a/src/Disks/DiskEncrypted.h +++ b/src/Disks/DiskEncrypted.h @@ -350,6 +350,13 @@ public: return delegate; } +#if USE_AWS_S3 + std::shared_ptr getS3StorageClient() const override + { + return delegate->getS3StorageClient(); + } +#endif + private: String wrappedPath(const String & path) const { diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h index 614fe413503..658acb01c74 100644 --- a/src/Disks/IDisk.h +++ b/src/Disks/IDisk.h @@ -14,7 +14,6 @@ #include #include -#include #include #include #include @@ -116,13 +115,18 @@ public: /// Default constructor. IDisk(const String & name_, const Poco::Util::AbstractConfiguration & config, const String & config_prefix) : name(name_) - , copying_thread_pool(CurrentMetrics::IDiskCopierThreads, CurrentMetrics::IDiskCopierThreadsActive, CurrentMetrics::IDiskCopierThreadsScheduled, config.getUInt(config_prefix + ".thread_pool_size", 16)) + , copying_thread_pool( + CurrentMetrics::IDiskCopierThreads, + CurrentMetrics::IDiskCopierThreadsActive, + CurrentMetrics::IDiskCopierThreadsScheduled, + config.getUInt(config_prefix + ".thread_pool_size", 16)) { } explicit IDisk(const String & name_) : name(name_) - , copying_thread_pool(CurrentMetrics::IDiskCopierThreads, CurrentMetrics::IDiskCopierThreadsActive, CurrentMetrics::IDiskCopierThreadsScheduled, 16) + , copying_thread_pool( + CurrentMetrics::IDiskCopierThreads, CurrentMetrics::IDiskCopierThreadsActive, CurrentMetrics::IDiskCopierThreadsScheduled, 16) { } @@ -466,6 +470,17 @@ public: virtual DiskPtr getDelegateDiskIfExists() const { return nullptr; } +#if USE_AWS_S3 + virtual std::shared_ptr getS3StorageClient() const + { + throw Exception( + ErrorCodes::NOT_IMPLEMENTED, + "Method getS3StorageClient() is not implemented for disk type: {}", + getDataSourceDescription().toString()); + } +#endif + + protected: friend class DiskDecorator; diff --git a/src/Disks/IO/IOUringReader.h b/src/Disks/IO/IOUringReader.h index 89e71e4b215..359b3badc45 100644 --- a/src/Disks/IO/IOUringReader.h +++ b/src/Disks/IO/IOUringReader.h @@ -61,12 +61,12 @@ private: void monitorRing(); - template inline void failPromise(std::promise & promise, const Exception & ex) + template void failPromise(std::promise & promise, const Exception & ex) { promise.set_exception(std::make_exception_ptr(ex)); } - inline std::future makeFailedResult(const Exception & ex) + std::future makeFailedResult(const Exception & ex) { auto promise = std::promise{}; failPromise(promise, ex); diff --git a/src/Disks/IO/WriteBufferFromAzureBlobStorage.h b/src/Disks/IO/WriteBufferFromAzureBlobStorage.h index 96ba6acefff..3da6d843991 100644 --- a/src/Disks/IO/WriteBufferFromAzureBlobStorage.h +++ b/src/Disks/IO/WriteBufferFromAzureBlobStorage.h @@ -13,7 +13,7 @@ #include #include #include -#include +#include namespace Poco { diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp index bee8e206ec4..e7ecf7cd515 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp @@ -79,14 +79,14 @@ private: for (const auto & blob : blobs_list) { - batch.emplace_back( + batch.emplace_back(std::make_shared( blob.Name, ObjectMetadata{ static_cast(blob.BlobSize), Poco::Timestamp::fromEpochTime( std::chrono::duration_cast( static_cast(blob.Details.LastModified).time_since_epoch()).count()), - {}}); + {}})); } if (!blob_list_response.NextPageToken.HasValue() || blob_list_response.NextPageToken.Value().empty()) @@ -148,15 +148,15 @@ bool AzureObjectStorage::exists(const StoredObject & object) const return false; } -ObjectStorageIteratorPtr AzureObjectStorage::iterate(const std::string & path_prefix) const +ObjectStorageIteratorPtr AzureObjectStorage::iterate(const std::string & path_prefix, size_t max_keys) const { auto settings_ptr = settings.get(); auto client_ptr = client.get(); - return std::make_shared(path_prefix, client_ptr, settings_ptr->list_object_keys_size); + return std::make_shared(path_prefix, client_ptr, max_keys); } -void AzureObjectStorage::listObjects(const std::string & path, RelativePathsWithMetadata & children, int max_keys) const +void AzureObjectStorage::listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const { auto client_ptr = client.get(); @@ -179,19 +179,19 @@ void AzureObjectStorage::listObjects(const std::string & path, RelativePathsWith for (const auto & blob : blobs_list) { - children.emplace_back( + children.emplace_back(std::make_shared( blob.Name, ObjectMetadata{ static_cast(blob.BlobSize), Poco::Timestamp::fromEpochTime( std::chrono::duration_cast( static_cast(blob.Details.LastModified).time_since_epoch()).count()), - {}}); + {}})); } if (max_keys) { - int keys_left = max_keys - static_cast(children.size()); + size_t keys_left = max_keys - children.size(); if (keys_left <= 0) break; options.PageSizeHint = keys_left; @@ -346,10 +346,11 @@ void AzureObjectStorage::removeObjectsIfExist(const StoredObjects & objects) { auto client_ptr = client.get(); for (const auto & object : objects) + { removeObjectImpl(object, client_ptr, true); + } } - ObjectMetadata AzureObjectStorage::getObjectMetadata(const std::string & path) const { auto client_ptr = client.get(); @@ -366,9 +367,9 @@ ObjectMetadata AzureObjectStorage::getObjectMetadata(const std::string & path) c { result.attributes.emplace(); for (const auto & [key, value] : properties.Metadata) - (*result.attributes)[key] = value; + result.attributes[key] = value; } - result.last_modified.emplace(static_cast(properties.LastModified).time_since_epoch().count()); + result.last_modified = static_cast(properties.LastModified).time_since_epoch().count(); return result; } @@ -397,7 +398,9 @@ void AzureObjectStorage::copyObject( /// NOLINT dest_blob_client.CopyFromUri(source_blob_client.GetUrl(), copy_options); } -void AzureObjectStorage::applyNewSettings(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) +void AzureObjectStorage::applyNewSettings( + const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, + ContextPtr context, const ApplyNewSettingsOptions &) { auto new_settings = getAzureBlobStorageSettings(config, config_prefix, context); settings.set(std::move(new_settings)); diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h index c3062def763..8ead696cf78 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h @@ -85,9 +85,9 @@ public: const String & object_namespace_, const String & description_); - void listObjects(const std::string & path, RelativePathsWithMetadata & children, int max_keys) const override; + void listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const override; - ObjectStorageIteratorPtr iterate(const std::string & path_prefix) const override; + ObjectStorageIteratorPtr iterate(const std::string & path_prefix, size_t max_keys) const override; std::string getName() const override { return "AzureObjectStorage"; } @@ -144,7 +144,8 @@ public: void applyNewSettings( const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, - ContextPtr context) override; + ContextPtr context, + const ApplyNewSettingsOptions & options) override; String getObjectsNamespace() const override { return object_namespace ; } diff --git a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp index e3ab772e3b5..f2f33684fde 100644 --- a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp +++ b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp @@ -176,7 +176,7 @@ std::unique_ptr CachedObjectStorage::cloneObjectStorage( return object_storage->cloneObjectStorage(new_namespace, config, config_prefix, context); } -void CachedObjectStorage::listObjects(const std::string & path, RelativePathsWithMetadata & children, int max_keys) const +void CachedObjectStorage::listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const { object_storage->listObjects(path, children, max_keys); } @@ -192,9 +192,10 @@ void CachedObjectStorage::shutdown() } void CachedObjectStorage::applyNewSettings( - const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) + const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, + ContextPtr context, const ApplyNewSettingsOptions & options) { - object_storage->applyNewSettings(config, config_prefix, context); + object_storage->applyNewSettings(config, config_prefix, context, options); } String CachedObjectStorage::getObjectsNamespace() const diff --git a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h index 961c2709efc..f06f78fbe4a 100644 --- a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h +++ b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h @@ -80,7 +80,7 @@ public: const std::string & config_prefix, ContextPtr context) override; - void listObjects(const std::string & path, RelativePathsWithMetadata & children, int max_keys) const override; + void listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const override; ObjectMetadata getObjectMetadata(const std::string & path) const override; @@ -91,7 +91,8 @@ public: void applyNewSettings( const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, - ContextPtr context) override; + ContextPtr context, + const ApplyNewSettingsOptions & options) override; String getObjectsNamespace() const override; @@ -126,6 +127,13 @@ public: } #endif +#if USE_AWS_S3 + std::shared_ptr getS3StorageClient() override + { + return object_storage->getS3StorageClient(); + } +#endif + private: FileCacheKey getCacheKey(const std::string & path) const; diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.cpp b/src/Disks/ObjectStorages/DiskObjectStorage.cpp index d4ff9bc0b79..5803a985000 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorage.cpp @@ -544,7 +544,7 @@ void DiskObjectStorage::applyNewSettings( { /// FIXME we cannot use config_prefix that was passed through arguments because the disk may be wrapped with cache and we need another name const auto config_prefix = "storage_configuration.disks." + name; - object_storage->applyNewSettings(config, config_prefix, context_); + object_storage->applyNewSettings(config, config_prefix, context_, IObjectStorage::ApplyNewSettingsOptions{ .allow_client_change = true }); { std::unique_lock lock(resource_mutex); @@ -582,6 +582,12 @@ UInt64 DiskObjectStorage::getRevision() const return metadata_helper->getRevision(); } +#if USE_AWS_S3 +std::shared_ptr DiskObjectStorage::getS3StorageClient() const +{ + return object_storage->getS3StorageClient(); +} +#endif DiskPtr DiskObjectStorageReservation::getDisk(size_t i) const { diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.h b/src/Disks/ObjectStorages/DiskObjectStorage.h index 2a27ddf89a7..ffef0a007da 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.h +++ b/src/Disks/ObjectStorages/DiskObjectStorage.h @@ -6,6 +6,8 @@ #include #include +#include "config.h" + namespace CurrentMetrics { @@ -210,6 +212,10 @@ public: bool supportsChmod() const override { return metadata_storage->supportsChmod(); } void chmod(const String & path, mode_t mode) override; +#if USE_AWS_S3 + std::shared_ptr getS3StorageClient() const override; +#endif + private: /// Create actual disk object storage transaction for operations diff --git a/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.cpp b/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.cpp index 18a0377efe7..701c08b9a14 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.cpp @@ -364,18 +364,18 @@ void DiskObjectStorageRemoteMetadataRestoreHelper::restoreFiles(IObjectStorage * for (const auto & object : objects) { - LOG_INFO(disk->log, "Calling restore for key for disk {}", object.relative_path); + LOG_INFO(disk->log, "Calling restore for key for disk {}", object->relative_path); /// Skip file operations objects. They will be processed separately. - if (object.relative_path.find("/operations/") != String::npos) + if (object->relative_path.find("/operations/") != String::npos) continue; - const auto [revision, _] = extractRevisionAndOperationFromKey(object.relative_path); + const auto [revision, _] = extractRevisionAndOperationFromKey(object->relative_path); /// Filter early if it's possible to get revision from key. if (revision > restore_information.revision) continue; - keys_names.push_back(object.relative_path); + keys_names.push_back(object->relative_path); } if (!keys_names.empty()) @@ -405,26 +405,20 @@ void DiskObjectStorageRemoteMetadataRestoreHelper::processRestoreFiles( { for (const auto & key : keys) { - auto meta = source_object_storage->getObjectMetadata(key); - auto object_attributes = meta.attributes; + auto metadata = source_object_storage->getObjectMetadata(key); + auto object_attributes = metadata.attributes; String path; - if (object_attributes.has_value()) + /// Restore file if object has 'path' in metadata. + auto path_entry = object_attributes.find("path"); + if (path_entry == object_attributes.end()) { - /// Restore file if object has 'path' in metadata. - auto path_entry = object_attributes->find("path"); - if (path_entry == object_attributes->end()) - { - /// Such keys can remain after migration, we can skip them. - LOG_WARNING(disk->log, "Skip key {} because it doesn't have 'path' in metadata", key); - continue; - } - - path = path_entry->second; - } - else + /// Such keys can remain after migration, we can skip them. + LOG_WARNING(disk->log, "Skip key {} because it doesn't have 'path' in metadata", key); continue; + } + path = path_entry->second; disk->createDirectories(directoryPath(path)); auto object_key = ObjectStorageKey::createAsRelative(disk->object_key_prefix, shrinkKey(source_path, key)); @@ -436,7 +430,7 @@ void DiskObjectStorageRemoteMetadataRestoreHelper::processRestoreFiles( source_object_storage->copyObjectToAnotherObjectStorage(object_from, object_to, read_settings, write_settings, *disk->object_storage); auto tx = disk->metadata_storage->createTransaction(); - tx->addBlobToMetadata(path, object_key, meta.size_bytes); + tx->addBlobToMetadata(path, object_key, metadata.size_bytes); tx->commit(); LOG_TRACE(disk->log, "Restored file {}", path); @@ -475,10 +469,10 @@ void DiskObjectStorageRemoteMetadataRestoreHelper::restoreFileOperations(IObject for (const auto & object : objects) { - const auto [revision, operation] = extractRevisionAndOperationFromKey(object.relative_path); + const auto [revision, operation] = extractRevisionAndOperationFromKey(object->relative_path); if (revision == UNKNOWN_REVISION) { - LOG_WARNING(disk->log, "Skip key {} with unknown revision", object.relative_path); + LOG_WARNING(disk->log, "Skip key {} with unknown revision", object->relative_path); continue; } @@ -491,7 +485,7 @@ void DiskObjectStorageRemoteMetadataRestoreHelper::restoreFileOperations(IObject if (send_metadata) revision_counter = revision - 1; - auto object_attributes = *(source_object_storage->getObjectMetadata(object.relative_path).attributes); + auto object_attributes = source_object_storage->getObjectMetadata(object->relative_path).attributes; if (operation == rename) { auto from_path = object_attributes["from_path"]; diff --git a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp index e717c88ed22..dcb2af9d4d3 100644 --- a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp +++ b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp @@ -1,12 +1,13 @@ #include #include -#include -#include +#include +#include -#include +#include #include #include +#include #if USE_HDFS @@ -18,28 +19,57 @@ namespace ErrorCodes { extern const int UNSUPPORTED_METHOD; extern const int HDFS_ERROR; + extern const int ACCESS_DENIED; + extern const int LOGICAL_ERROR; } -void HDFSObjectStorage::shutdown() +void HDFSObjectStorage::initializeHDFSFS() const { + if (initialized) + return; + + std::lock_guard lock(init_mutex); + if (initialized) + return; + + hdfs_builder = createHDFSBuilder(url, config); + hdfs_fs = createHDFSFS(hdfs_builder.get()); + initialized = true; } -void HDFSObjectStorage::startup() +std::string HDFSObjectStorage::extractObjectKeyFromURL(const StoredObject & object) const { + /// This is very unfortunate, but for disk HDFS we made a mistake + /// and now its behaviour is inconsistent with S3 and Azure disks. + /// The mistake is that for HDFS we write into metadata files whole URL + data directory + key, + /// while for S3 and Azure we write there only data_directory + key. + /// This leads us into ambiguity that for StorageHDFS we have just key in object.remote_path, + /// but for DiskHDFS we have there URL as well. + auto path = object.remote_path; + if (path.starts_with(url)) + path = path.substr(url.size()); + if (path.starts_with("/")) + path.substr(1); + return path; } ObjectStorageKey HDFSObjectStorage::generateObjectKeyForPath(const std::string & /* path */) const { + initializeHDFSFS(); /// what ever data_source_description.description value is, consider that key as relative key - return ObjectStorageKey::createAsRelative(hdfs_root_path, getRandomASCIIString(32)); + chassert(data_directory.starts_with("/")); + return ObjectStorageKey::createAsRelative( + fs::path(url_without_path) / data_directory.substr(1), getRandomASCIIString(32)); } bool HDFSObjectStorage::exists(const StoredObject & object) const { - const auto & path = object.remote_path; - const size_t begin_of_path = path.find('/', path.find("//") + 2); - const String remote_fs_object_path = path.substr(begin_of_path); - return (0 == hdfsExists(hdfs_fs.get(), remote_fs_object_path.c_str())); + initializeHDFSFS(); + std::string path = object.remote_path; + if (path.starts_with(url_without_path)) + path = path.substr(url_without_path.size()); + + return (0 == hdfsExists(hdfs_fs.get(), path.c_str())); } std::unique_ptr HDFSObjectStorage::readObject( /// NOLINT @@ -48,7 +78,10 @@ std::unique_ptr HDFSObjectStorage::readObject( /// NOLIN std::optional, std::optional) const { - return std::make_unique(object.remote_path, object.remote_path, config, patchSettings(read_settings)); + initializeHDFSFS(); + auto path = extractObjectKeyFromURL(object); + return std::make_unique( + fs::path(url_without_path) / "", fs::path(data_directory) / path, config, patchSettings(read_settings)); } std::unique_ptr HDFSObjectStorage::readObjects( /// NOLINT @@ -57,18 +90,15 @@ std::unique_ptr HDFSObjectStorage::readObjects( /// NOLI std::optional, std::optional) const { + initializeHDFSFS(); auto disk_read_settings = patchSettings(read_settings); auto read_buffer_creator = [this, disk_read_settings] (bool /* restricted_seek */, const StoredObject & object_) -> std::unique_ptr { - const auto & path = object_.remote_path; - size_t begin_of_path = path.find('/', path.find("//") + 2); - auto hdfs_path = path.substr(begin_of_path); - auto hdfs_uri = path.substr(0, begin_of_path); - + auto path = extractObjectKeyFromURL(object_); return std::make_unique( - hdfs_uri, hdfs_path, config, disk_read_settings, /* read_until_position */0, /* use_external_buffer */true); + fs::path(url_without_path) / "", fs::path(data_directory) / path, config, disk_read_settings, /* read_until_position */0, /* use_external_buffer */true); }; return std::make_unique( @@ -82,14 +112,21 @@ std::unique_ptr HDFSObjectStorage::writeObject( /// NOL size_t buf_size, const WriteSettings & write_settings) { + initializeHDFSFS(); if (attributes.has_value()) throw Exception( ErrorCodes::UNSUPPORTED_METHOD, "HDFS API doesn't support custom attributes/metadata for stored objects"); + std::string path = object.remote_path; + if (path.starts_with("/")) + path = path.substr(1); + if (!path.starts_with(url)) + path = fs::path(url) / path; + /// Single O_WRONLY in libhdfs adds O_TRUNC return std::make_unique( - object.remote_path, config, settings->replication, patchSettings(write_settings), buf_size, + path, config, settings->replication, patchSettings(write_settings), buf_size, mode == WriteMode::Rewrite ? O_WRONLY : O_WRONLY | O_APPEND); } @@ -97,11 +134,13 @@ std::unique_ptr HDFSObjectStorage::writeObject( /// NOL /// Remove file. Throws exception if file doesn't exists or it's a directory. void HDFSObjectStorage::removeObject(const StoredObject & object) { - const auto & path = object.remote_path; - const size_t begin_of_path = path.find('/', path.find("//") + 2); + initializeHDFSFS(); + auto path = object.remote_path; + if (path.starts_with(url_without_path)) + path = path.substr(url_without_path.size()); /// Add path from root to file name - int res = hdfsDelete(hdfs_fs.get(), path.substr(begin_of_path).c_str(), 0); + int res = hdfsDelete(hdfs_fs.get(), path.c_str(), 0); if (res == -1) throw Exception(ErrorCodes::HDFS_ERROR, "HDFSDelete failed with path: {}", path); @@ -109,27 +148,85 @@ void HDFSObjectStorage::removeObject(const StoredObject & object) void HDFSObjectStorage::removeObjects(const StoredObjects & objects) { + initializeHDFSFS(); for (const auto & object : objects) removeObject(object); } void HDFSObjectStorage::removeObjectIfExists(const StoredObject & object) { + initializeHDFSFS(); if (exists(object)) removeObject(object); } void HDFSObjectStorage::removeObjectsIfExist(const StoredObjects & objects) { + initializeHDFSFS(); for (const auto & object : objects) removeObjectIfExists(object); } -ObjectMetadata HDFSObjectStorage::getObjectMetadata(const std::string &) const +ObjectMetadata HDFSObjectStorage::getObjectMetadata(const std::string & path) const { - throw Exception( - ErrorCodes::UNSUPPORTED_METHOD, - "HDFS API doesn't support custom attributes/metadata for stored objects"); + initializeHDFSFS(); + auto * file_info = hdfsGetPathInfo(hdfs_fs.get(), path.data()); + if (!file_info) + throw Exception(ErrorCodes::HDFS_ERROR, + "Cannot get file info for: {}. Error: {}", path, hdfsGetLastError()); + + ObjectMetadata metadata; + metadata.size_bytes = static_cast(file_info->mSize); + metadata.last_modified = Poco::Timestamp::fromEpochTime(file_info->mLastMod); + + hdfsFreeFileInfo(file_info, 1); + return metadata; +} + +void HDFSObjectStorage::listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const +{ + initializeHDFSFS(); + LOG_TEST(log, "Trying to list files for {}", path); + + HDFSFileInfo ls; + ls.file_info = hdfsListDirectory(hdfs_fs.get(), path.data(), &ls.length); + + if (ls.file_info == nullptr && errno != ENOENT) // NOLINT + { + // ignore file not found exception, keep throw other exception, + // libhdfs3 doesn't have function to get exception type, so use errno. + throw Exception(ErrorCodes::ACCESS_DENIED, "Cannot list directory {}: {}", + path, String(hdfsGetLastError())); + } + + if (!ls.file_info && ls.length > 0) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "file_info shouldn't be null"); + } + + LOG_TEST(log, "Listed {} files for {}", ls.length, path); + + for (int i = 0; i < ls.length; ++i) + { + const String file_path = fs::path(ls.file_info[i].mName).lexically_normal(); + const bool is_directory = ls.file_info[i].mKind == 'D'; + if (is_directory) + { + listObjects(fs::path(file_path) / "", children, max_keys); + } + else + { + children.emplace_back(std::make_shared( + String(file_path), + ObjectMetadata{ + static_cast(ls.file_info[i].mSize), + Poco::Timestamp::fromEpochTime(ls.file_info[i].mLastMod), + {}})); + } + + if (max_keys && children.size() >= max_keys) + break; + } } void HDFSObjectStorage::copyObject( /// NOLINT @@ -139,6 +236,7 @@ void HDFSObjectStorage::copyObject( /// NOLINT const WriteSettings & write_settings, std::optional object_to_attributes) { + initializeHDFSFS(); if (object_to_attributes.has_value()) throw Exception( ErrorCodes::UNSUPPORTED_METHOD, @@ -151,7 +249,10 @@ void HDFSObjectStorage::copyObject( /// NOLINT } -std::unique_ptr HDFSObjectStorage::cloneObjectStorage(const std::string &, const Poco::Util::AbstractConfiguration &, const std::string &, ContextPtr) +std::unique_ptr HDFSObjectStorage::cloneObjectStorage( + const std::string &, + const Poco::Util::AbstractConfiguration &, + const std::string &, ContextPtr) { throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "HDFS object storage doesn't support cloning"); } diff --git a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h index 66095eb9f8f..8aae90d0721 100644 --- a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h +++ b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h @@ -6,7 +6,7 @@ #include #include -#include +#include #include #include #include @@ -16,21 +16,13 @@ namespace DB struct HDFSObjectStorageSettings { - - HDFSObjectStorageSettings() = default; - - size_t min_bytes_for_seek; - int objects_chunk_size_to_delete; - int replication; - - HDFSObjectStorageSettings( - int min_bytes_for_seek_, - int objects_chunk_size_to_delete_, - int replication_) + HDFSObjectStorageSettings(int min_bytes_for_seek_, int replication_) : min_bytes_for_seek(min_bytes_for_seek_) - , objects_chunk_size_to_delete(objects_chunk_size_to_delete_) , replication(replication_) {} + + size_t min_bytes_for_seek; + int replication; }; @@ -43,20 +35,29 @@ public: HDFSObjectStorage( const String & hdfs_root_path_, SettingsPtr settings_, - const Poco::Util::AbstractConfiguration & config_) + const Poco::Util::AbstractConfiguration & config_, + bool lazy_initialize) : config(config_) - , hdfs_builder(createHDFSBuilder(hdfs_root_path_, config)) - , hdfs_fs(createHDFSFS(hdfs_builder.get())) , settings(std::move(settings_)) - , hdfs_root_path(hdfs_root_path_) + , log(getLogger("HDFSObjectStorage(" + hdfs_root_path_ + ")")) { + const size_t begin_of_path = hdfs_root_path_.find('/', hdfs_root_path_.find("//") + 2); + url = hdfs_root_path_; + url_without_path = url.substr(0, begin_of_path); + if (begin_of_path < url.size()) + data_directory = url.substr(begin_of_path); + else + data_directory = "/"; + + if (!lazy_initialize) + initializeHDFSFS(); } std::string getName() const override { return "HDFSObjectStorage"; } - std::string getCommonKeyPrefix() const override { return hdfs_root_path; } + std::string getCommonKeyPrefix() const override { return url; } - std::string getDescription() const override { return hdfs_root_path; } + std::string getDescription() const override { return url; } ObjectStorageType getType() const override { return ObjectStorageType::HDFS; } @@ -100,9 +101,7 @@ public: const WriteSettings & write_settings, std::optional object_to_attributes = {}) override; - void shutdown() override; - - void startup() override; + void listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const override; String getObjectsNamespace() const override { return ""; } @@ -116,13 +115,28 @@ public: bool isRemote() const override { return true; } + void startup() override { } + + void shutdown() override { } + private: + void initializeHDFSFS() const; + std::string extractObjectKeyFromURL(const StoredObject & object) const; + const Poco::Util::AbstractConfiguration & config; - HDFSBuilderWrapper hdfs_builder; - HDFSFSPtr hdfs_fs; + mutable HDFSBuilderWrapper hdfs_builder; + mutable HDFSFSPtr hdfs_fs; + + mutable std::mutex init_mutex; + mutable std::atomic_bool initialized{false}; + SettingsPtr settings; - const std::string hdfs_root_path; + std::string url; + std::string url_without_path; + std::string data_directory; + + LoggerPtr log; }; } diff --git a/src/Disks/ObjectStorages/IObjectStorage.cpp b/src/Disks/ObjectStorages/IObjectStorage.cpp index accef9a08ab..fd1269df79b 100644 --- a/src/Disks/ObjectStorages/IObjectStorage.cpp +++ b/src/Disks/ObjectStorages/IObjectStorage.cpp @@ -25,16 +25,16 @@ bool IObjectStorage::existsOrHasAnyChild(const std::string & path) const return !files.empty(); } -void IObjectStorage::listObjects(const std::string &, RelativePathsWithMetadata &, int) const +void IObjectStorage::listObjects(const std::string &, RelativePathsWithMetadata &, size_t) const { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "listObjects() is not supported"); } -ObjectStorageIteratorPtr IObjectStorage::iterate(const std::string & path_prefix) const +ObjectStorageIteratorPtr IObjectStorage::iterate(const std::string & path_prefix, size_t max_keys) const { RelativePathsWithMetadata files; - listObjects(path_prefix, files, 0); + listObjects(path_prefix, files, max_keys); return std::make_shared(std::move(files)); } diff --git a/src/Disks/ObjectStorages/IObjectStorage.h b/src/Disks/ObjectStorages/IObjectStorage.h index eae31af9d44..b49dc839561 100644 --- a/src/Disks/ObjectStorages/IObjectStorage.h +++ b/src/Disks/ObjectStorages/IObjectStorage.h @@ -1,10 +1,10 @@ #pragma once -#include #include #include #include #include +#include #include #include @@ -31,12 +31,17 @@ #include #endif +#if USE_AWS_S3 +#include +#endif + namespace DB { namespace ErrorCodes { extern const int NOT_IMPLEMENTED; + extern const int LOGICAL_ERROR; } class ReadBufferFromFileBase; @@ -47,21 +52,28 @@ using ObjectAttributes = std::map; struct ObjectMetadata { uint64_t size_bytes = 0; - std::optional last_modified; - std::optional attributes; + Poco::Timestamp last_modified; + ObjectAttributes attributes; }; struct RelativePathWithMetadata { String relative_path; - ObjectMetadata metadata; + std::optional metadata; RelativePathWithMetadata() = default; - RelativePathWithMetadata(String relative_path_, ObjectMetadata metadata_) + explicit RelativePathWithMetadata(String relative_path_, std::optional metadata_ = std::nullopt) : relative_path(std::move(relative_path_)) , metadata(std::move(metadata_)) {} + + virtual ~RelativePathWithMetadata() = default; + + virtual std::string getFileName() const { return std::filesystem::path(relative_path).filename(); } + virtual std::string getPath() const { return relative_path; } + virtual bool isArchive() const { return false; } + virtual std::string getPathToArchive() const { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not an archive"); } }; struct ObjectKeyWithMetadata @@ -77,7 +89,8 @@ struct ObjectKeyWithMetadata {} }; -using RelativePathsWithMetadata = std::vector; +using RelativePathWithMetadataPtr = std::shared_ptr; +using RelativePathsWithMetadata = std::vector; using ObjectKeysWithMetadata = std::vector; class IObjectStorageIterator; @@ -111,9 +124,9 @@ public: /// /, /a, /a/b, /a/b/c, /a/b/c/d while exists will return true only for /a/b/c/d virtual bool existsOrHasAnyChild(const std::string & path) const; - virtual void listObjects(const std::string & path, RelativePathsWithMetadata & children, int max_keys) const; + virtual void listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const; - virtual ObjectStorageIteratorPtr iterate(const std::string & path_prefix) const; + virtual ObjectStorageIteratorPtr iterate(const std::string & path_prefix, size_t max_keys) const; /// Get object metadata if supported. It should be possible to receive /// at least size of object @@ -190,11 +203,15 @@ public: virtual void startup() = 0; /// Apply new settings, in most cases reiniatilize client and some other staff + struct ApplyNewSettingsOptions + { + bool allow_client_change = true; + }; virtual void applyNewSettings( - const Poco::Util::AbstractConfiguration &, + const Poco::Util::AbstractConfiguration & /* config */, const std::string & /*config_prefix*/, - ContextPtr) - {} + ContextPtr /* context */, + const ApplyNewSettingsOptions & /* options */) {} /// Sometimes object storages have something similar to chroot or namespace, for example /// buckets in S3. If object storage doesn't have any namepaces return empty string. @@ -244,6 +261,13 @@ public: } #endif +#if USE_AWS_S3 + virtual std::shared_ptr getS3StorageClient() + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "This function is only implemented for S3ObjectStorage"); + } +#endif + private: mutable std::mutex throttlers_mutex; diff --git a/src/Disks/ObjectStorages/IObjectStorage_fwd.h b/src/Disks/ObjectStorages/IObjectStorage_fwd.h index f6ebc883682..67efa4aae2b 100644 --- a/src/Disks/ObjectStorages/IObjectStorage_fwd.h +++ b/src/Disks/ObjectStorages/IObjectStorage_fwd.h @@ -10,4 +10,7 @@ using ObjectStoragePtr = std::shared_ptr; class IMetadataStorage; using MetadataStoragePtr = std::shared_ptr; +class IObjectStorageIterator; +using ObjectStorageIteratorPtr = std::shared_ptr; + } diff --git a/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp b/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp index d44e17a0713..a247d86ddce 100644 --- a/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp +++ b/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp @@ -172,7 +172,7 @@ ObjectMetadata LocalObjectStorage::getObjectMetadata(const std::string & path) c return object_metadata; } -void LocalObjectStorage::listObjects(const std::string & path, RelativePathsWithMetadata & children, int /* max_keys */) const +void LocalObjectStorage::listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t/* max_keys */) const { for (const auto & entry : fs::directory_iterator(path)) { @@ -182,8 +182,7 @@ void LocalObjectStorage::listObjects(const std::string & path, RelativePathsWith continue; } - auto metadata = getObjectMetadata(entry.path()); - children.emplace_back(entry.path(), std::move(metadata)); + children.emplace_back(std::make_shared(entry.path(), getObjectMetadata(entry.path()))); } } @@ -223,11 +222,6 @@ std::unique_ptr LocalObjectStorage::cloneObjectStorage( throw Exception(ErrorCodes::NOT_IMPLEMENTED, "cloneObjectStorage() is not implemented for LocalObjectStorage"); } -void LocalObjectStorage::applyNewSettings( - const Poco::Util::AbstractConfiguration & /* config */, const std::string & /* config_prefix */, ContextPtr /* context */) -{ -} - ObjectStorageKey LocalObjectStorage::generateObjectKeyForPath(const std::string & /* path */) const { constexpr size_t key_name_total_size = 32; diff --git a/src/Disks/ObjectStorages/Local/LocalObjectStorage.h b/src/Disks/ObjectStorages/Local/LocalObjectStorage.h index 22429a99c76..371cd37f8b2 100644 --- a/src/Disks/ObjectStorages/Local/LocalObjectStorage.h +++ b/src/Disks/ObjectStorages/Local/LocalObjectStorage.h @@ -58,7 +58,7 @@ public: ObjectMetadata getObjectMetadata(const std::string & path) const override; - void listObjects(const std::string & path, RelativePathsWithMetadata & children, int max_keys) const override; + void listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const override; bool existsOrHasAnyChild(const std::string & path) const override; @@ -73,11 +73,6 @@ public: void startup() override; - void applyNewSettings( - const Poco::Util::AbstractConfiguration & config, - const std::string & config_prefix, - ContextPtr context) override; - String getObjectsNamespace() const override { return ""; } std::unique_ptr cloneObjectStorage( diff --git a/src/Disks/ObjectStorages/MetadataStorageFactory.cpp b/src/Disks/ObjectStorages/MetadataStorageFactory.cpp index 4a3e8a37d28..ab7c2069b43 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFactory.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFactory.cpp @@ -99,8 +99,10 @@ void registerMetadataStorageFromDisk(MetadataStorageFactory & factory) { auto metadata_path = config.getString(config_prefix + ".metadata_path", fs::path(Context::getGlobalContextInstance()->getPath()) / "disks" / name / ""); + auto metadata_keep_free_space_bytes = config.getUInt64(config_prefix + ".metadata_keep_free_space_bytes", 0); + fs::create_directories(metadata_path); - auto metadata_disk = std::make_shared(name + "-metadata", metadata_path, 0, config, config_prefix); + auto metadata_disk = std::make_shared(name + "-metadata", metadata_path, metadata_keep_free_space_bytes, config, config_prefix); auto key_compatibility_prefix = getObjectKeyCompatiblePrefix(*object_storage, config, config_prefix); return std::make_shared(metadata_disk, key_compatibility_prefix); }); diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp index faa7ca38b75..30111d04d20 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp @@ -105,7 +105,7 @@ std::vector MetadataStorageFromPlainObjectStorage::getDirectChildre std::unordered_set duplicates_filter; for (const auto & elem : remote_paths) { - const auto & path = elem.relative_path; + const auto & path = elem->relative_path; chassert(path.find(storage_key) == 0); const auto child_pos = storage_key.size(); /// string::npos is ok. diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp b/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp index d910dae80b4..3e772271b99 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp @@ -26,11 +26,11 @@ MetadataStorageFromPlainObjectStorage::PathMap loadPathPrefixMap(const std::stri object_storage->listObjects(root, files, 0); for (const auto & file : files) { - auto remote_path = std::filesystem::path(file.relative_path); + auto remote_path = std::filesystem::path(file->relative_path); if (remote_path.filename() != PREFIX_PATH_FILE_NAME) continue; - StoredObject object{file.relative_path}; + StoredObject object{file->relative_path}; auto read_buf = object_storage->readObject(object); String local_path; @@ -88,7 +88,7 @@ std::vector getDirectChildrenOnRewritableDisk( auto skip_list = std::set{PREFIX_PATH_FILE_NAME}; for (const auto & elem : remote_paths) { - const auto & path = elem.relative_path; + const auto & path = elem->relative_path; chassert(path.find(storage_key) == 0); const auto child_pos = storage_key.size(); diff --git a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp index c83b9247b99..d7884c2911b 100644 --- a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp +++ b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp @@ -9,7 +9,7 @@ #endif #if USE_HDFS && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD) #include -#include +#include #endif #if USE_AZURE_BLOB_STORAGE && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD) #include @@ -183,7 +183,7 @@ void registerS3ObjectStorage(ObjectStorageFactory & factory) auto uri = getS3URI(config, config_prefix, context); auto s3_capabilities = getCapabilitiesFromConfig(config, config_prefix); auto settings = getSettings(config, config_prefix, context); - auto client = getClient(config, config_prefix, context, *settings); + auto client = getClient(config, config_prefix, context, *settings, true); auto key_generator = getKeyGenerator(uri, config, config_prefix); auto object_storage = createObjectStorage( @@ -219,7 +219,7 @@ void registerS3PlainObjectStorage(ObjectStorageFactory & factory) auto uri = getS3URI(config, config_prefix, context); auto s3_capabilities = getCapabilitiesFromConfig(config, config_prefix); auto settings = getSettings(config, config_prefix, context); - auto client = getClient(config, config_prefix, context, *settings); + auto client = getClient(config, config_prefix, context, *settings, true); auto key_generator = getKeyGenerator(uri, config, config_prefix); auto object_storage = std::make_shared>( @@ -253,7 +253,7 @@ void registerS3PlainRewritableObjectStorage(ObjectStorageFactory & factory) auto uri = getS3URI(config, config_prefix, context); auto s3_capabilities = getCapabilitiesFromConfig(config, config_prefix); auto settings = getSettings(config, config_prefix, context); - auto client = getClient(config, config_prefix, context, *settings); + auto client = getClient(config, config_prefix, context, *settings, true); auto key_generator = getKeyGenerator(uri, config, config_prefix); auto object_storage = std::make_shared>( @@ -287,10 +287,9 @@ void registerHDFSObjectStorage(ObjectStorageFactory & factory) std::unique_ptr settings = std::make_unique( config.getUInt64(config_prefix + ".min_bytes_for_seek", 1024 * 1024), - config.getInt(config_prefix + ".objects_chunk_size_to_delete", 1000), context->getSettingsRef().hdfs_replication); - return createObjectStorage(ObjectStorageType::HDFS, config, config_prefix, uri, std::move(settings), config); + return createObjectStorage(ObjectStorageType::HDFS, config, config_prefix, uri, std::move(settings), config, /* lazy_initialize */false); }); } #endif diff --git a/src/Disks/ObjectStorages/ObjectStorageIterator.cpp b/src/Disks/ObjectStorages/ObjectStorageIterator.cpp index 72ec6e0e500..3d939ce9230 100644 --- a/src/Disks/ObjectStorages/ObjectStorageIterator.cpp +++ b/src/Disks/ObjectStorages/ObjectStorageIterator.cpp @@ -9,7 +9,7 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -RelativePathWithMetadata ObjectStorageIteratorFromList::current() +RelativePathWithMetadataPtr ObjectStorageIteratorFromList::current() { if (!isValid()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to access invalid iterator"); diff --git a/src/Disks/ObjectStorages/ObjectStorageIterator.h b/src/Disks/ObjectStorages/ObjectStorageIterator.h index 9af2593579a..26c3c690ba5 100644 --- a/src/Disks/ObjectStorages/ObjectStorageIterator.h +++ b/src/Disks/ObjectStorages/ObjectStorageIterator.h @@ -12,9 +12,9 @@ public: virtual void next() = 0; virtual void nextBatch() = 0; virtual bool isValid() = 0; - virtual RelativePathWithMetadata current() = 0; + virtual RelativePathWithMetadataPtr current() = 0; virtual RelativePathsWithMetadata currentBatch() = 0; - virtual std::optional getCurrrentBatchAndScheduleNext() = 0; + virtual std::optional getCurrentBatchAndScheduleNext() = 0; virtual size_t getAccumulatedSize() const = 0; virtual ~IObjectStorageIterator() = default; @@ -27,9 +27,7 @@ class ObjectStorageIteratorFromList : public IObjectStorageIterator public: explicit ObjectStorageIteratorFromList(RelativePathsWithMetadata && batch_) : batch(std::move(batch_)) - , batch_iterator(batch.begin()) - { - } + , batch_iterator(batch.begin()) {} void next() override { @@ -37,32 +35,26 @@ public: ++batch_iterator; } - void nextBatch() override + void nextBatch() override { batch_iterator = batch.end(); } + + bool isValid() override { return batch_iterator != batch.end(); } + + RelativePathWithMetadataPtr current() override; + + RelativePathsWithMetadata currentBatch() override { return batch; } + + std::optional getCurrentBatchAndScheduleNext() override { - batch_iterator = batch.end(); + if (batch.empty()) + return {}; + + auto current_batch = std::move(batch); + batch = {}; + return current_batch; } - bool isValid() override - { - return batch_iterator != batch.end(); - } + size_t getAccumulatedSize() const override { return batch.size(); } - RelativePathWithMetadata current() override; - - RelativePathsWithMetadata currentBatch() override - { - return batch; - } - - std::optional getCurrrentBatchAndScheduleNext() override - { - return std::nullopt; - } - - size_t getAccumulatedSize() const override - { - return batch.size(); - } private: RelativePathsWithMetadata batch; RelativePathsWithMetadata::iterator batch_iterator; diff --git a/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.cpp b/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.cpp index 990e66fc4e5..0420de0f8dd 100644 --- a/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.cpp +++ b/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.cpp @@ -11,10 +11,37 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } +IObjectStorageIteratorAsync::IObjectStorageIteratorAsync( + CurrentMetrics::Metric threads_metric, + CurrentMetrics::Metric threads_active_metric, + CurrentMetrics::Metric threads_scheduled_metric, + const std::string & thread_name) + : list_objects_pool(threads_metric, threads_active_metric, threads_scheduled_metric, 1) + , list_objects_scheduler(threadPoolCallbackRunnerUnsafe(list_objects_pool, thread_name)) +{ +} + +IObjectStorageIteratorAsync::~IObjectStorageIteratorAsync() +{ + if (!deactivated) + deactivate(); +} + +void IObjectStorageIteratorAsync::deactivate() +{ + list_objects_pool.wait(); + deactivated = true; +} + void IObjectStorageIteratorAsync::nextBatch() { std::lock_guard lock(mutex); - if (!is_finished) + if (is_finished) + { + current_batch.clear(); + current_batch_iterator = current_batch.begin(); + } + else { if (!is_initialized) { @@ -22,19 +49,27 @@ void IObjectStorageIteratorAsync::nextBatch() is_initialized = true; } - BatchAndHasNext next_batch = outcome_future.get(); - current_batch = std::move(next_batch.batch); - accumulated_size.fetch_add(current_batch.size(), std::memory_order_relaxed); - current_batch_iterator = current_batch.begin(); - if (next_batch.has_next) - outcome_future = scheduleBatch(); - else - is_finished = true; - } - else - { - current_batch.clear(); + chassert(outcome_future.valid()); + BatchAndHasNext result; + try + { + result = outcome_future.get(); + } + catch (...) + { + is_finished = true; + throw; + } + + current_batch = std::move(result.batch); current_batch_iterator = current_batch.begin(); + + accumulated_size.fetch_add(current_batch.size(), std::memory_order_relaxed); + + if (result.has_next) + outcome_future = scheduleBatch(); + else + is_finished = true; } } @@ -42,24 +77,10 @@ void IObjectStorageIteratorAsync::next() { std::lock_guard lock(mutex); - if (current_batch_iterator != current_batch.end()) - { + if (current_batch_iterator == current_batch.end()) + nextBatch(); + else ++current_batch_iterator; - } - else if (!is_finished) - { - if (outcome_future.valid()) - { - BatchAndHasNext next_batch = outcome_future.get(); - current_batch = std::move(next_batch.batch); - accumulated_size.fetch_add(current_batch.size(), std::memory_order_relaxed); - current_batch_iterator = current_batch.begin(); - if (next_batch.has_next) - outcome_future = scheduleBatch(); - else - is_finished = true; - } - } } std::future IObjectStorageIteratorAsync::scheduleBatch() @@ -72,7 +93,6 @@ std::future IObjectStorageIterator }, Priority{}); } - bool IObjectStorageIteratorAsync::isValid() { if (!is_initialized) @@ -82,7 +102,7 @@ bool IObjectStorageIteratorAsync::isValid() return current_batch_iterator != current_batch.end(); } -RelativePathWithMetadata IObjectStorageIteratorAsync::current() +RelativePathWithMetadataPtr IObjectStorageIteratorAsync::current() { if (!isValid()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to access invalid iterator"); @@ -101,20 +121,20 @@ RelativePathsWithMetadata IObjectStorageIteratorAsync::currentBatch() return current_batch; } -std::optional IObjectStorageIteratorAsync::getCurrrentBatchAndScheduleNext() +std::optional IObjectStorageIteratorAsync::getCurrentBatchAndScheduleNext() { std::lock_guard lock(mutex); if (!is_initialized) nextBatch(); - if (current_batch_iterator != current_batch.end()) + if (current_batch_iterator == current_batch.end()) { - auto temp_current_batch = current_batch; - nextBatch(); - return temp_current_batch; + return std::nullopt; } - return std::nullopt; + auto temp_current_batch = std::move(current_batch); + nextBatch(); + return temp_current_batch; } size_t IObjectStorageIteratorAsync::getAccumulatedSize() const diff --git a/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.h b/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.h index 7fdb02bdfe2..cb4818d01ae 100644 --- a/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.h +++ b/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.h @@ -17,24 +17,22 @@ public: CurrentMetrics::Metric threads_metric, CurrentMetrics::Metric threads_active_metric, CurrentMetrics::Metric threads_scheduled_metric, - const std::string & thread_name) - : list_objects_pool(threads_metric, threads_active_metric, threads_scheduled_metric, 1) - , list_objects_scheduler(threadPoolCallbackRunnerUnsafe(list_objects_pool, thread_name)) - { - } + const std::string & thread_name); + + ~IObjectStorageIteratorAsync() override; + + bool isValid() override; + + RelativePathWithMetadataPtr current() override; + RelativePathsWithMetadata currentBatch() override; void next() override; void nextBatch() override; - bool isValid() override; - RelativePathWithMetadata current() override; - RelativePathsWithMetadata currentBatch() override; - size_t getAccumulatedSize() const override; - std::optional getCurrrentBatchAndScheduleNext() override; - ~IObjectStorageIteratorAsync() override - { - list_objects_pool.wait(); - } + size_t getAccumulatedSize() const override; + std::optional getCurrentBatchAndScheduleNext() override; + + void deactivate(); protected: @@ -50,6 +48,7 @@ protected: bool is_initialized{false}; bool is_finished{false}; + bool deactivated{false}; mutable std::recursive_mutex mutex; ThreadPool list_objects_pool; diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index adbdd9d13aa..7694337dc55 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -61,7 +61,10 @@ void throwIfError(const Aws::Utils::Outcome & response) if (!response.IsSuccess()) { const auto & err = response.GetError(); - throw S3Exception(fmt::format("{} (Code: {})", err.GetMessage(), static_cast(err.GetErrorType())), err.GetErrorType()); + throw S3Exception( + fmt::format("{} (Code: {}, s3 exception: {})", + err.GetMessage(), static_cast(err.GetErrorType()), err.GetExceptionName()), + err.GetErrorType()); } } @@ -111,10 +114,19 @@ public: CurrentMetrics::ObjectStorageS3ThreadsScheduled, "ListObjectS3") , client(client_) + , request(std::make_unique()) { - request.SetBucket(bucket_); - request.SetPrefix(path_prefix); - request.SetMaxKeys(static_cast(max_list_size)); + request->SetBucket(bucket_); + request->SetPrefix(path_prefix); + request->SetMaxKeys(static_cast(max_list_size)); + } + + ~S3IteratorAsync() override + { + /// Deactivate background threads before resetting the request to avoid data race. + deactivate(); + request.reset(); + client.reset(); } private: @@ -123,34 +135,32 @@ private: ProfileEvents::increment(ProfileEvents::S3ListObjects); ProfileEvents::increment(ProfileEvents::DiskS3ListObjects); - bool result = false; - auto outcome = client->ListObjectsV2(request); + auto outcome = client->ListObjectsV2(*request); + /// Outcome failure will be handled on the caller side. if (outcome.IsSuccess()) { + request->SetContinuationToken(outcome.GetResult().GetNextContinuationToken()); + auto objects = outcome.GetResult().GetContents(); - - result = !objects.empty(); - for (const auto & object : objects) - batch.emplace_back( - object.GetKey(), - ObjectMetadata{static_cast(object.GetSize()), Poco::Timestamp::fromEpochTime(object.GetLastModified().Seconds()), {}} - ); + { + ObjectMetadata metadata{static_cast(object.GetSize()), Poco::Timestamp::fromEpochTime(object.GetLastModified().Seconds()), {}}; + batch.emplace_back(std::make_shared(object.GetKey(), std::move(metadata))); + } - if (result) - request.SetContinuationToken(outcome.GetResult().GetNextContinuationToken()); - - return result; + /// It returns false when all objects were returned + return outcome.GetResult().GetIsTruncated(); } - throw S3Exception(outcome.GetError().GetErrorType(), "Could not list objects in bucket {} with prefix {}, S3 exception: {}, message: {}", - quoteString(request.GetBucket()), quoteString(request.GetPrefix()), - backQuote(outcome.GetError().GetExceptionName()), quoteString(outcome.GetError().GetMessage())); + throw S3Exception(outcome.GetError().GetErrorType(), + "Could not list objects in bucket {} with prefix {}, S3 exception: {}, message: {}", + quoteString(request->GetBucket()), quoteString(request->GetPrefix()), + backQuote(outcome.GetError().GetExceptionName()), quoteString(outcome.GetError().GetMessage())); } std::shared_ptr client; - S3::ListObjectsV2Request request; + std::unique_ptr request; }; } @@ -248,12 +258,16 @@ std::unique_ptr S3ObjectStorage::writeObject( /// NOLIN if (mode != WriteMode::Rewrite) throw Exception(ErrorCodes::BAD_ARGUMENTS, "S3 doesn't support append to files"); - auto settings_ptr = s3_settings.get(); + S3Settings::RequestSettings request_settings = s3_settings.get()->request_settings; + if (auto query_context = CurrentThread::getQueryContext()) + { + request_settings.updateFromSettingsIfChanged(query_context->getSettingsRef()); + } + ThreadPoolCallbackRunnerUnsafe scheduler; if (write_settings.s3_allow_parallel_part_upload) scheduler = threadPoolCallbackRunnerUnsafe(getThreadPoolWriter(), "VFSWrite"); - auto blob_storage_log = BlobStorageLogWriter::create(disk_name); if (blob_storage_log) blob_storage_log->local_path = object.local_path; @@ -263,7 +277,7 @@ std::unique_ptr S3ObjectStorage::writeObject( /// NOLIN uri.bucket, object.remote_path, buf_size, - settings_ptr->request_settings, + request_settings, std::move(blob_storage_log), attributes, std::move(scheduler), @@ -271,13 +285,13 @@ std::unique_ptr S3ObjectStorage::writeObject( /// NOLIN } -ObjectStorageIteratorPtr S3ObjectStorage::iterate(const std::string & path_prefix) const +ObjectStorageIteratorPtr S3ObjectStorage::iterate(const std::string & path_prefix, size_t max_keys) const { auto settings_ptr = s3_settings.get(); - return std::make_shared(uri.bucket, path_prefix, client.get(), settings_ptr->list_object_keys_size); + return std::make_shared(uri.bucket, path_prefix, client.get(), max_keys); } -void S3ObjectStorage::listObjects(const std::string & path, RelativePathsWithMetadata & children, int max_keys) const +void S3ObjectStorage::listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const { auto settings_ptr = s3_settings.get(); @@ -285,7 +299,7 @@ void S3ObjectStorage::listObjects(const std::string & path, RelativePathsWithMet request.SetBucket(uri.bucket); request.SetPrefix(path); if (max_keys) - request.SetMaxKeys(max_keys); + request.SetMaxKeys(static_cast(max_keys)); else request.SetMaxKeys(settings_ptr->list_object_keys_size); @@ -305,19 +319,19 @@ void S3ObjectStorage::listObjects(const std::string & path, RelativePathsWithMet break; for (const auto & object : objects) - children.emplace_back( + children.emplace_back(std::make_shared( object.GetKey(), ObjectMetadata{ static_cast(object.GetSize()), Poco::Timestamp::fromEpochTime(object.GetLastModified().Seconds()), - {}}); + {}})); if (max_keys) { - int keys_left = max_keys - static_cast(children.size()); + size_t keys_left = max_keys - children.size(); if (keys_left <= 0) break; - request.SetMaxKeys(keys_left); + request.SetMaxKeys(static_cast(keys_left)); } request.SetContinuationToken(outcome.GetResult().GetNextContinuationToken()); @@ -425,14 +439,16 @@ void S3ObjectStorage::removeObjectsIfExist(const StoredObjects & objects) std::optional S3ObjectStorage::tryGetObjectMetadata(const std::string & path) const { auto settings_ptr = s3_settings.get(); - auto object_info = S3::getObjectInfo(*client.get(), uri.bucket, path, {}, settings_ptr->request_settings, /* with_metadata= */ true, /* throw_on_error= */ false); + auto object_info = S3::getObjectInfo( + *client.get(), uri.bucket, path, {}, settings_ptr->request_settings, + /* with_metadata= */ true, /* throw_on_error= */ false); if (object_info.size == 0 && object_info.last_modification_time == 0 && object_info.metadata.empty()) return {}; ObjectMetadata result; result.size_bytes = object_info.size; - result.last_modified = object_info.last_modification_time; + result.last_modified = Poco::Timestamp::fromEpochTime(object_info.last_modification_time); result.attributes = object_info.metadata; return result; @@ -441,11 +457,20 @@ std::optional S3ObjectStorage::tryGetObjectMetadata(const std::s ObjectMetadata S3ObjectStorage::getObjectMetadata(const std::string & path) const { auto settings_ptr = s3_settings.get(); - auto object_info = S3::getObjectInfo(*client.get(), uri.bucket, path, {}, settings_ptr->request_settings, /* with_metadata= */ true); + S3::ObjectInfo object_info; + try + { + object_info = S3::getObjectInfo(*client.get(), uri.bucket, path, {}, settings_ptr->request_settings, /* with_metadata= */ true); + } + catch (DB::Exception & e) + { + e.addMessage("while reading " + path); + throw; + } ObjectMetadata result; result.size_bytes = object_info.size; - result.last_modified = object_info.last_modification_time; + result.last_modified = Poco::Timestamp::fromEpochTime(object_info.last_modification_time); result.attributes = object_info.metadata; return result; @@ -470,13 +495,14 @@ void S3ObjectStorage::copyObjectToAnotherObjectStorage( // NOLINT try { copyS3File( - current_client, - uri.bucket, - object_from.remote_path, - 0, - size, - dest_s3->uri.bucket, - object_to.remote_path, + /*src_s3_client=*/current_client, + /*src_bucket=*/uri.bucket, + /*src_key=*/object_from.remote_path, + /*src_offset=*/0, + /*src_size=*/size, + /*dest_s3_client=*/current_client, + /*dest_bucket=*/dest_s3->uri.bucket, + /*dest_key=*/object_to.remote_path, settings_ptr->request_settings, patchSettings(read_settings), BlobStorageLogWriter::create(disk_name), @@ -510,13 +536,15 @@ void S3ObjectStorage::copyObject( // NOLINT auto size = S3::getObjectSize(*current_client, uri.bucket, object_from.remote_path, {}, settings_ptr->request_settings); auto scheduler = threadPoolCallbackRunnerUnsafe(getThreadPoolWriter(), "S3ObjStor_copy"); - copyS3File(current_client, - uri.bucket, - object_from.remote_path, - 0, - size, - uri.bucket, - object_to.remote_path, + copyS3File( + /*src_s3_client=*/current_client, + /*src_bucket=*/uri.bucket, + /*src_key=*/object_from.remote_path, + /*src_offset=*/0, + /*src_size=*/size, + /*dest_s3_client=*/current_client, + /*dest_bucket=*/uri.bucket, + /*dest_key=*/object_to.remote_path, settings_ptr->request_settings, patchSettings(read_settings), BlobStorageLogWriter::create(disk_name), @@ -544,19 +572,38 @@ void S3ObjectStorage::startup() const_cast(*client.get()).EnableRequestProcessing(); } -void S3ObjectStorage::applyNewSettings(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) +void S3ObjectStorage::applyNewSettings( + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + ContextPtr context, + const ApplyNewSettingsOptions & options) { - auto new_s3_settings = getSettings(config, config_prefix, context); - auto new_client = getClient(config, config_prefix, context, *new_s3_settings); - s3_settings.set(std::move(new_s3_settings)); - client.set(std::move(new_client)); + auto settings_from_config = getSettings(config, config_prefix, context, context->getSettingsRef().s3_validate_request_settings); + auto modified_settings = std::make_unique(*s3_settings.get()); + modified_settings->auth_settings.updateFrom(settings_from_config->auth_settings); + modified_settings->request_settings = settings_from_config->request_settings; + + if (auto endpoint_settings = context->getStorageS3Settings().getSettings(uri.uri.toString(), context->getUserName())) + modified_settings->auth_settings.updateFrom(endpoint_settings->auth_settings); + + auto current_settings = s3_settings.get(); + if (options.allow_client_change + && (current_settings->auth_settings.hasUpdates(modified_settings->auth_settings) || for_disk_s3)) + { + auto new_client = getClient(config, config_prefix, context, *modified_settings, for_disk_s3, &uri); + client.set(std::move(new_client)); + } + s3_settings.set(std::move(modified_settings)); } std::unique_ptr S3ObjectStorage::cloneObjectStorage( - const std::string & new_namespace, const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) + const std::string & new_namespace, + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + ContextPtr context) { auto new_s3_settings = getSettings(config, config_prefix, context); - auto new_client = getClient(config, config_prefix, context, *new_s3_settings); + auto new_client = getClient(config, config_prefix, context, *new_s3_settings, true); auto new_uri{uri}; new_uri.bucket = new_namespace; @@ -573,6 +620,11 @@ ObjectStorageKey S3ObjectStorage::generateObjectKeyForPath(const std::string & p return key_generator->generate(path, /* is_directory */ false); } +std::shared_ptr S3ObjectStorage::getS3StorageClient() +{ + return client.get(); +} + } #endif diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h index 5eaab4b585c..6eacf3a1eee 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h @@ -21,11 +21,13 @@ struct S3ObjectStorageSettings S3ObjectStorageSettings( const S3Settings::RequestSettings & request_settings_, + const S3::AuthSettings & auth_settings_, uint64_t min_bytes_for_seek_, int32_t list_object_keys_size_, int32_t objects_chunk_size_to_delete_, bool read_only_) : request_settings(request_settings_) + , auth_settings(auth_settings_) , min_bytes_for_seek(min_bytes_for_seek_) , list_object_keys_size(list_object_keys_size_) , objects_chunk_size_to_delete(objects_chunk_size_to_delete_) @@ -33,6 +35,7 @@ struct S3ObjectStorageSettings {} S3Settings::RequestSettings request_settings; + S3::AuthSettings auth_settings; uint64_t min_bytes_for_seek; int32_t list_object_keys_size; @@ -50,7 +53,8 @@ private: S3::URI uri_, const S3Capabilities & s3_capabilities_, ObjectStorageKeysGeneratorPtr key_generator_, - const String & disk_name_) + const String & disk_name_, + bool for_disk_s3_ = true) : uri(uri_) , disk_name(disk_name_) , client(std::move(client_)) @@ -58,6 +62,7 @@ private: , s3_capabilities(s3_capabilities_) , key_generator(std::move(key_generator_)) , log(getLogger(logger_name)) + , for_disk_s3(for_disk_s3_) { } @@ -98,9 +103,9 @@ public: size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, const WriteSettings & write_settings = {}) override; - void listObjects(const std::string & path, RelativePathsWithMetadata & children, int max_keys) const override; + void listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const override; - ObjectStorageIteratorPtr iterate(const std::string & path_prefix) const override; + ObjectStorageIteratorPtr iterate(const std::string & path_prefix, size_t max_keys) const override; /// Uses `DeleteObjectRequest`. void removeObject(const StoredObject & object) override; @@ -142,7 +147,8 @@ public: void applyNewSettings( const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, - ContextPtr context) override; + ContextPtr context, + const ApplyNewSettingsOptions & options) override; std::string getObjectsNamespace() const override { return uri.bucket; } @@ -162,6 +168,7 @@ public: bool isReadOnly() const override { return s3_settings.get()->read_only; } + std::shared_ptr getS3StorageClient() override; private: void setNewSettings(std::unique_ptr && s3_settings_); @@ -179,6 +186,8 @@ private: ObjectStorageKeysGeneratorPtr key_generator; LoggerPtr log; + + const bool for_disk_s3; }; } diff --git a/src/Disks/ObjectStorages/S3/diskSettings.cpp b/src/Disks/ObjectStorages/S3/diskSettings.cpp index 35913613326..139472a8b01 100644 --- a/src/Disks/ObjectStorages/S3/diskSettings.cpp +++ b/src/Disks/ObjectStorages/S3/diskSettings.cpp @@ -25,19 +25,29 @@ namespace DB { +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} namespace ErrorCodes { extern const int NO_ELEMENTS_IN_CONFIG; } -std::unique_ptr getSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context) +std::unique_ptr getSettings( + const Poco::Util::AbstractConfiguration & config, + const String & config_prefix, + ContextPtr context, + bool validate_settings) { const Settings & settings = context->getSettingsRef(); - S3Settings::RequestSettings request_settings(config, config_prefix, settings, "s3_"); + auto request_settings = S3Settings::RequestSettings(config, config_prefix, settings, "s3_", validate_settings); + auto auth_settings = S3::AuthSettings::loadFromConfig(config_prefix, config); return std::make_unique( request_settings, + auth_settings, config.getUInt64(config_prefix + ".min_bytes_for_seek", 1024 * 1024), config.getInt(config_prefix + ".list_object_keys_size", 1000), config.getInt(config_prefix + ".objects_chunk_size_to_delete", 1000), @@ -48,82 +58,99 @@ std::unique_ptr getClient( const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context, - const S3ObjectStorageSettings & settings) + const S3ObjectStorageSettings & settings, + bool for_disk_s3, + const S3::URI * url_) { const Settings & global_settings = context->getGlobalContext()->getSettingsRef(); const Settings & local_settings = context->getSettingsRef(); - const String endpoint = context->getMacros()->expand(config.getString(config_prefix + ".endpoint")); - S3::URI uri(endpoint); - if (!uri.key.ends_with('/')) - uri.key.push_back('/'); + const auto & auth_settings = settings.auth_settings; + const auto & request_settings = settings.request_settings; - if (S3::isS3ExpressEndpoint(endpoint) && !config.has(config_prefix + ".region")) + S3::URI url; + if (for_disk_s3) + { + String endpoint = context->getMacros()->expand(config.getString(config_prefix + ".endpoint")); + url = S3::URI(endpoint); + if (!url.key.ends_with('/')) + url.key.push_back('/'); + } + else + { + if (!url_) + throw Exception(ErrorCodes::LOGICAL_ERROR, "URL not passed"); + url = *url_; + } + const bool is_s3_express_bucket = S3::isS3ExpressEndpoint(url.endpoint); + if (is_s3_express_bucket && !config.has(config_prefix + ".region")) + { throw Exception( ErrorCodes::NO_ELEMENTS_IN_CONFIG, "Region should be explicitly specified for directory buckets ({})", config_prefix); + } S3::PocoHTTPClientConfiguration client_configuration = S3::ClientFactory::instance().createClientConfiguration( - config.getString(config_prefix + ".region", ""), + auth_settings.region, context->getRemoteHostFilter(), static_cast(global_settings.s3_max_redirects), static_cast(global_settings.s3_retry_attempts), global_settings.enable_s3_requests_logging, - /* for_disk_s3 = */ true, + for_disk_s3, settings.request_settings.get_request_throttler, settings.request_settings.put_request_throttler, - uri.uri.getScheme()); + url.uri.getScheme()); - client_configuration.connectTimeoutMs = config.getUInt(config_prefix + ".connect_timeout_ms", S3::DEFAULT_CONNECT_TIMEOUT_MS); - client_configuration.requestTimeoutMs = config.getUInt(config_prefix + ".request_timeout_ms", S3::DEFAULT_REQUEST_TIMEOUT_MS); - client_configuration.maxConnections = config.getUInt(config_prefix + ".max_connections", S3::DEFAULT_MAX_CONNECTIONS); + client_configuration.connectTimeoutMs = config.getUInt64(config_prefix + ".connect_timeout_ms", local_settings.s3_connect_timeout_ms.value); + client_configuration.requestTimeoutMs = config.getUInt64(config_prefix + ".request_timeout_ms", local_settings.s3_request_timeout_ms.value); + client_configuration.maxConnections = config.getUInt(config_prefix + ".max_connections", static_cast(request_settings.max_connections)); client_configuration.http_keep_alive_timeout = config.getUInt(config_prefix + ".http_keep_alive_timeout", S3::DEFAULT_KEEP_ALIVE_TIMEOUT); client_configuration.http_keep_alive_max_requests = config.getUInt(config_prefix + ".http_keep_alive_max_requests", S3::DEFAULT_KEEP_ALIVE_MAX_REQUESTS); - client_configuration.endpointOverride = uri.endpoint; + client_configuration.endpointOverride = url.endpoint; client_configuration.s3_use_adaptive_timeouts = config.getBool( config_prefix + ".use_adaptive_timeouts", client_configuration.s3_use_adaptive_timeouts); - /* - * Override proxy configuration for backwards compatibility with old configuration format. - * */ - auto proxy_config = DB::ProxyConfigurationResolverProvider::getFromOldSettingsFormat( - ProxyConfiguration::protocolFromString(uri.uri.getScheme()), - config_prefix, - config - ); - if (proxy_config) + if (for_disk_s3) { - client_configuration.per_request_configuration - = [proxy_config]() { return proxy_config->resolve(); }; - client_configuration.error_report - = [proxy_config](const auto & request_config) { proxy_config->errorReport(request_config); }; + /* + * Override proxy configuration for backwards compatibility with old configuration format. + * */ + if (auto proxy_config = DB::ProxyConfigurationResolverProvider::getFromOldSettingsFormat( + ProxyConfiguration::protocolFromString(url.uri.getScheme()), config_prefix, config)) + { + client_configuration.per_request_configuration + = [proxy_config]() { return proxy_config->resolve(); }; + client_configuration.error_report + = [proxy_config](const auto & request_config) { proxy_config->errorReport(request_config); }; + } } - HTTPHeaderEntries headers = S3::getHTTPHeaders(config_prefix, config); S3::ServerSideEncryptionKMSConfig sse_kms_config = S3::getSSEKMSConfig(config_prefix, config); - S3::ClientSettings client_settings{ - .use_virtual_addressing = uri.is_virtual_hosted_style, + .use_virtual_addressing = url.is_virtual_hosted_style, .disable_checksum = local_settings.s3_disable_checksum, .gcs_issue_compose_request = config.getBool("s3.gcs_issue_compose_request", false), - .is_s3express_bucket = S3::isS3ExpressEndpoint(endpoint), + .is_s3express_bucket = is_s3_express_bucket, + }; + + auto credentials_configuration = S3::CredentialsConfiguration + { + auth_settings.use_environment_credentials.value_or(context->getConfigRef().getBool("s3.use_environment_credentials", true)), + auth_settings.use_insecure_imds_request.value_or(context->getConfigRef().getBool("s3.use_insecure_imds_request", false)), + auth_settings.expiration_window_seconds.value_or(context->getConfigRef().getUInt64("s3.expiration_window_seconds", S3::DEFAULT_EXPIRATION_WINDOW_SECONDS)), + auth_settings.no_sign_request.value_or(context->getConfigRef().getBool("s3.no_sign_request", false)), }; return S3::ClientFactory::instance().create( client_configuration, client_settings, - config.getString(config_prefix + ".access_key_id", ""), - config.getString(config_prefix + ".secret_access_key", ""), - config.getString(config_prefix + ".server_side_encryption_customer_key_base64", ""), + auth_settings.access_key_id, + auth_settings.secret_access_key, + auth_settings.server_side_encryption_customer_key_base64, std::move(sse_kms_config), - std::move(headers), - S3::CredentialsConfiguration - { - config.getBool(config_prefix + ".use_environment_credentials", config.getBool("s3.use_environment_credentials", true)), - config.getBool(config_prefix + ".use_insecure_imds_request", config.getBool("s3.use_insecure_imds_request", false)), - config.getUInt64(config_prefix + ".expiration_window_seconds", config.getUInt64("s3.expiration_window_seconds", S3::DEFAULT_EXPIRATION_WINDOW_SECONDS)), - config.getBool(config_prefix + ".no_sign_request", config.getBool("s3.no_sign_request", false)) - }); + auth_settings.headers, + credentials_configuration, + auth_settings.session_token); } } diff --git a/src/Disks/ObjectStorages/S3/diskSettings.h b/src/Disks/ObjectStorages/S3/diskSettings.h index e461daa99e2..11ac64ce913 100644 --- a/src/Disks/ObjectStorages/S3/diskSettings.h +++ b/src/Disks/ObjectStorages/S3/diskSettings.h @@ -14,9 +14,19 @@ namespace DB struct S3ObjectStorageSettings; -std::unique_ptr getSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context); +std::unique_ptr getSettings( + const Poco::Util::AbstractConfiguration & config, + const String & config_prefix, + ContextPtr context, + bool validate_settings = true); -std::unique_ptr getClient(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context, const S3ObjectStorageSettings & settings); +std::unique_ptr getClient( + const Poco::Util::AbstractConfiguration & config, + const String & config_prefix, + ContextPtr context, + const S3ObjectStorageSettings & settings, + bool for_disk_s3, + const S3::URI * url_ = nullptr); } diff --git a/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp b/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp index 69f6137cd2d..e837e056acc 100644 --- a/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp +++ b/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp @@ -344,11 +344,6 @@ void WebObjectStorage::startup() { } -void WebObjectStorage::applyNewSettings( - const Poco::Util::AbstractConfiguration & /* config */, const std::string & /* config_prefix */, ContextPtr /* context */) -{ -} - ObjectMetadata WebObjectStorage::getObjectMetadata(const std::string & /* path */) const { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Metadata is not supported for {}", getName()); diff --git a/src/Disks/ObjectStorages/Web/WebObjectStorage.h b/src/Disks/ObjectStorages/Web/WebObjectStorage.h index b8ab510a6fb..9ca2950dae0 100644 --- a/src/Disks/ObjectStorages/Web/WebObjectStorage.h +++ b/src/Disks/ObjectStorages/Web/WebObjectStorage.h @@ -3,6 +3,8 @@ #include "config.h" #include + +#include #include namespace Poco @@ -72,11 +74,6 @@ public: void startup() override; - void applyNewSettings( - const Poco::Util::AbstractConfiguration & config, - const std::string & config_prefix, - ContextPtr context) override; - String getObjectsNamespace() const override { return ""; } std::unique_ptr cloneObjectStorage( diff --git a/src/Formats/EscapingRuleUtils.cpp b/src/Formats/EscapingRuleUtils.cpp index 3edade639df..89a7a31d033 100644 --- a/src/Formats/EscapingRuleUtils.cpp +++ b/src/Formats/EscapingRuleUtils.cpp @@ -76,7 +76,7 @@ void skipFieldByEscapingRule(ReadBuffer & buf, FormatSettings::EscapingRule esca /// Empty field, just skip spaces break; case FormatSettings::EscapingRule::Escaped: - readEscapedStringInto(out, buf); + readEscapedStringInto(out, buf); break; case FormatSettings::EscapingRule::Quoted: readQuotedFieldInto(out, buf); diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 3199445864d..a7883919c4c 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -1,6 +1,7 @@ #include #include +#include #include #include #include @@ -15,7 +16,7 @@ #include #include #include -#include +#include #include @@ -154,6 +155,7 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se format_settings.parquet.case_insensitive_column_matching = settings.input_format_parquet_case_insensitive_column_matching; format_settings.parquet.preserve_order = settings.input_format_parquet_preserve_order; format_settings.parquet.filter_push_down = settings.input_format_parquet_filter_push_down; + format_settings.parquet.use_native_reader = settings.input_format_parquet_use_native_reader; format_settings.parquet.allow_missing_columns = settings.input_format_parquet_allow_missing_columns; format_settings.parquet.skip_columns_with_unsupported_types_in_schema_inference = settings.input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference; format_settings.parquet.output_string_as_string = settings.output_format_parquet_string_as_string; @@ -202,6 +204,7 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se format_settings.tsv.try_detect_header = settings.input_format_tsv_detect_header; format_settings.tsv.skip_trailing_empty_lines = settings.input_format_tsv_skip_trailing_empty_lines; format_settings.tsv.allow_variable_number_of_columns = settings.input_format_tsv_allow_variable_number_of_columns; + format_settings.tsv.crlf_end_of_line_input = settings.input_format_tsv_crlf_end_of_line; format_settings.values.accurate_types_of_literals = settings.input_format_values_accurate_types_of_literals; format_settings.values.allow_data_after_semicolon = settings.input_format_values_allow_data_after_semicolon; format_settings.values.deduce_templates_of_expressions = settings.input_format_values_deduce_templates_of_expressions; @@ -693,21 +696,12 @@ String FormatFactory::getFormatFromFileName(String file_name) std::optional FormatFactory::tryGetFormatFromFileDescriptor(int fd) { -#ifdef OS_LINUX - std::string proc_path = fmt::format("/proc/self/fd/{}", fd); - char file_path[PATH_MAX] = {'\0'}; - if (readlink(proc_path.c_str(), file_path, sizeof(file_path) - 1) != -1) - return tryGetFormatFromFileName(file_path); + std::optional file_name = tryGetFileNameFromFileDescriptor(fd); + + if (file_name) + return tryGetFormatFromFileName(*file_name); + return std::nullopt; -#elif defined(OS_DARWIN) - char file_path[PATH_MAX] = {'\0'}; - if (fcntl(fd, F_GETPATH, file_path) != -1) - return tryGetFormatFromFileName(file_path); - return std::nullopt; -#else - (void)fd; - return std::nullopt; -#endif } String FormatFactory::getFormatFromFileDescriptor(int fd) diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index f29fc51af6a..b296928e4d4 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -44,9 +44,9 @@ struct FormatSettings String column_names_for_schema_inference{}; String schema_inference_hints{}; - bool try_infer_integers = false; - bool try_infer_dates = false; - bool try_infer_datetimes = false; + bool try_infer_integers = true; + bool try_infer_dates = true; + bool try_infer_datetimes = true; bool try_infer_exponent_floats = false; enum class DateTimeInputFormat : uint8_t @@ -258,6 +258,7 @@ struct FormatSettings bool skip_columns_with_unsupported_types_in_schema_inference = false; bool case_insensitive_column_matching = false; bool filter_push_down = true; + bool use_native_reader = false; std::unordered_set skip_row_groups = {}; bool output_string_as_string = false; bool output_fixed_string_as_fixed_byte_array = true; @@ -361,6 +362,7 @@ struct FormatSettings bool try_detect_header = true; bool skip_trailing_empty_lines = false; bool allow_variable_number_of_columns = false; + bool crlf_end_of_line_input = false; } tsv{}; struct diff --git a/src/Formats/NativeReader.cpp b/src/Formats/NativeReader.cpp index 8286b24d0a6..39915b0735e 100644 --- a/src/Formats/NativeReader.cpp +++ b/src/Formats/NativeReader.cpp @@ -93,7 +93,7 @@ void NativeReader::readData(const ISerialization & serialization, ColumnPtr & co ISerialization::DeserializeBinaryBulkStatePtr state; - serialization.deserializeBinaryBulkStatePrefix(settings, state); + serialization.deserializeBinaryBulkStatePrefix(settings, state, nullptr); serialization.deserializeBinaryBulkWithMultipleStreams(column, rows, settings, state, nullptr); if (column->size() != rows) diff --git a/src/Functions/DivisionUtils.h b/src/Functions/DivisionUtils.h index ff07309e248..7fd5b7476e1 100644 --- a/src/Functions/DivisionUtils.h +++ b/src/Functions/DivisionUtils.h @@ -68,7 +68,7 @@ struct DivideIntegralImpl static const constexpr bool allow_string_integer = false; template - static inline Result apply(A a, B b) + static Result apply(A a, B b) { using CastA = std::conditional_t && std::is_same_v, uint8_t, A>; using CastB = std::conditional_t && std::is_same_v, uint8_t, B>; @@ -120,7 +120,7 @@ struct ModuloImpl static const constexpr bool allow_string_integer = false; template - static inline Result apply(A a, B b) + static Result apply(A a, B b) { if constexpr (std::is_floating_point_v) { @@ -175,7 +175,7 @@ struct PositiveModuloImpl : ModuloImpl using ResultType = typename NumberTraits::ResultOfPositiveModulo::Type; template - static inline Result apply(A a, B b) + static Result apply(A a, B b) { auto res = ModuloImpl::template apply(a, b); if constexpr (is_signed_v) diff --git a/src/Functions/FunctionBinaryArithmetic.h b/src/Functions/FunctionBinaryArithmetic.h index 6203999fa37..5d19ba44d9b 100644 --- a/src/Functions/FunctionBinaryArithmetic.h +++ b/src/Functions/FunctionBinaryArithmetic.h @@ -284,7 +284,7 @@ struct BinaryOperation private: template - static inline void apply(const A * __restrict a, const B * __restrict b, ResultType * __restrict c, size_t i) + static void apply(const A * __restrict a, const B * __restrict b, ResultType * __restrict c, size_t i) { if constexpr (op_case == OpCase::Vector) c[i] = Op::template apply(a[i], b[i]); @@ -432,7 +432,7 @@ template struct FixedStringReduceOperationImpl { template - static void inline process(const UInt8 * __restrict a, const UInt8 * __restrict b, UInt16 * __restrict result, size_t size, size_t N) + static void process(const UInt8 * __restrict a, const UInt8 * __restrict b, UInt16 * __restrict result, size_t size, size_t N) { if constexpr (op_case == OpCase::Vector) vectorVector(a, b, result, size, N); @@ -503,7 +503,7 @@ struct StringReduceOperationImpl } } - static inline UInt64 constConst(std::string_view a, std::string_view b) + static UInt64 constConst(std::string_view a, std::string_view b) { return process( reinterpret_cast(a.data()), @@ -643,7 +643,7 @@ public: private: template - static inline void processWithRightNullmapImpl(const auto & a, const auto & b, ResultContainerType & c, size_t size, const NullMap * right_nullmap, ApplyFunc apply_func) + static void processWithRightNullmapImpl(const auto & a, const auto & b, ResultContainerType & c, size_t size, const NullMap * right_nullmap, ApplyFunc apply_func) { if (right_nullmap) { diff --git a/src/Functions/FunctionSQLJSON.h b/src/Functions/FunctionSQLJSON.h index 37db514fd1f..83ed874c47b 100644 --- a/src/Functions/FunctionSQLJSON.h +++ b/src/Functions/FunctionSQLJSON.h @@ -44,27 +44,27 @@ class DefaultJSONStringSerializer public: explicit DefaultJSONStringSerializer(ColumnString & col_str_) : col_str(col_str_) { } - inline void addRawData(const char * ptr, size_t len) + void addRawData(const char * ptr, size_t len) { out << std::string_view(ptr, len); } - inline void addRawString(std::string_view str) + void addRawString(std::string_view str) { out << str; } /// serialize the json element into stringstream - inline void addElement(const Element & element) + void addElement(const Element & element) { out << element.getElement(); } - inline void commit() + void commit() { auto out_str = out.str(); col_str.insertData(out_str.data(), out_str.size()); } - inline void rollback() {} + void rollback() {} private: ColumnString & col_str; std::stringstream out; // STYLE_CHECK_ALLOW_STD_STRING_STREAM @@ -82,27 +82,27 @@ public: prev_offset = offsets.empty() ? 0 : offsets.back(); } /// Put the data into column's buffer directly. - inline void addRawData(const char * ptr, size_t len) + void addRawData(const char * ptr, size_t len) { chars.insert(ptr, ptr + len); } - inline void addRawString(std::string_view str) + void addRawString(std::string_view str) { chars.insert(str.data(), str.data() + str.size()); } /// serialize the json element into column's buffer directly - inline void addElement(const Element & element) + void addElement(const Element & element) { formatter.append(element.getElement()); } - inline void commit() + void commit() { chars.push_back(0); offsets.push_back(chars.size()); } - inline void rollback() + void rollback() { chars.resize(prev_offset); } diff --git a/src/Functions/FunctionsAES.h b/src/Functions/FunctionsAES.h index 14745460658..524b4f82acd 100644 --- a/src/Functions/FunctionsAES.h +++ b/src/Functions/FunctionsAES.h @@ -59,7 +59,7 @@ enum class CipherMode : uint8_t template struct KeyHolder { - inline StringRef setKey(size_t cipher_key_size, StringRef key) const + StringRef setKey(size_t cipher_key_size, StringRef key) const { if (key.size != cipher_key_size) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid key size: {} expected {}", key.size, cipher_key_size); @@ -71,7 +71,7 @@ struct KeyHolder template <> struct KeyHolder { - inline StringRef setKey(size_t cipher_key_size, StringRef key) + StringRef setKey(size_t cipher_key_size, StringRef key) { if (key.size < cipher_key_size) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid key size: {} expected {}", key.size, cipher_key_size); diff --git a/src/Functions/FunctionsBitToArray.cpp b/src/Functions/FunctionsBitToArray.cpp index 566ce16d1a7..adabda1a7f8 100644 --- a/src/Functions/FunctionsBitToArray.cpp +++ b/src/Functions/FunctionsBitToArray.cpp @@ -79,7 +79,7 @@ public: private: template - inline static void writeBitmask(T x, WriteBuffer & out) + static void writeBitmask(T x, WriteBuffer & out) { using UnsignedT = make_unsigned_t; UnsignedT u_x = x; diff --git a/src/Functions/FunctionsCodingIP.cpp b/src/Functions/FunctionsCodingIP.cpp index 54f7b6dd1f4..e01967274f4 100644 --- a/src/Functions/FunctionsCodingIP.cpp +++ b/src/Functions/FunctionsCodingIP.cpp @@ -785,7 +785,7 @@ private: #include - static inline void applyCIDRMask(const char * __restrict src, char * __restrict dst_lower, char * __restrict dst_upper, UInt8 bits_to_keep) + static void applyCIDRMask(const char * __restrict src, char * __restrict dst_lower, char * __restrict dst_upper, UInt8 bits_to_keep) { __m128i mask = _mm_loadu_si128(reinterpret_cast(getCIDRMaskIPv6(bits_to_keep).data())); __m128i lower = _mm_and_si128(_mm_loadu_si128(reinterpret_cast(src)), mask); @@ -916,7 +916,7 @@ public: class FunctionIPv4CIDRToRange : public IFunction { private: - static inline std::pair applyCIDRMask(UInt32 src, UInt8 bits_to_keep) + static std::pair applyCIDRMask(UInt32 src, UInt8 bits_to_keep) { if (bits_to_keep >= 8 * sizeof(UInt32)) return { src, src }; diff --git a/src/Functions/FunctionsConsistentHashing.h b/src/Functions/FunctionsConsistentHashing.h index 6f2eec5be98..306b6395dc5 100644 --- a/src/Functions/FunctionsConsistentHashing.h +++ b/src/Functions/FunctionsConsistentHashing.h @@ -83,7 +83,7 @@ private: using BucketsType = typename Impl::BucketsType; template - inline BucketsType checkBucketsRange(T buckets) const + BucketsType checkBucketsRange(T buckets) const { if (unlikely(buckets <= 0)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "The second argument of function {} (number of buckets) must be positive number", getName()); diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index beb7e6feb47..44d0b750af9 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -38,11 +39,13 @@ #include #include #include +#include #include #include #include #include #include +#include #include #include #include @@ -65,7 +68,6 @@ #include #include - namespace DB { @@ -574,7 +576,7 @@ ColumnUInt8::MutablePtr copyNullMap(ColumnPtr col) template struct ConvertImplGenericToString { - static ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) + static ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/, const ContextPtr & context) { static_assert(std::is_same_v || std::is_same_v, "Can be used only to serialize to ColumnString or ColumnFixedString"); @@ -595,7 +597,7 @@ struct ConvertImplGenericToString auto & write_buffer = write_helper.getWriteBuffer(); - FormatSettings format_settings; + FormatSettings format_settings = context ? getFormatSettings(context) : FormatSettings{}; auto serialization = type.getDefaultSerialization(); for (size_t row = 0; row < size; ++row) { @@ -1819,7 +1821,7 @@ struct ConvertImpl template struct ConvertImplGenericFromString { - static ColumnPtr execute(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * column_nullable, size_t input_rows_count) + static ColumnPtr execute(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * column_nullable, size_t input_rows_count, const ContextPtr & context) { const IColumn & column_from = *arguments[0].column; const IDataType & data_type_to = *result_type; @@ -1827,7 +1829,7 @@ struct ConvertImplGenericFromString auto serialization = data_type_to.getDefaultSerialization(); const auto * null_map = column_nullable ? &column_nullable->getNullMapData() : nullptr; - executeImpl(column_from, *res, *serialization, input_rows_count, null_map, result_type.get()); + executeImpl(column_from, *res, *serialization, input_rows_count, null_map, result_type.get(), context); return res; } @@ -1837,11 +1839,12 @@ struct ConvertImplGenericFromString const ISerialization & serialization_from, size_t input_rows_count, const PaddedPODArray * null_map, - const IDataType * result_type) + const IDataType * result_type, + const ContextPtr & context) { column_to.reserve(input_rows_count); - FormatSettings format_settings; + FormatSettings format_settings = context ? getFormatSettings(context) : FormatSettings{}; for (size_t i = 0; i < input_rows_count; ++i) { if (null_map && (*null_map)[i]) @@ -2298,7 +2301,7 @@ private: if constexpr (std::is_same_v) { if (from_type->getCustomSerialization()) - return ConvertImplGenericToString::execute(arguments, result_type, input_rows_count); + return ConvertImplGenericToString::execute(arguments, result_type, input_rows_count, context); } bool done = false; @@ -2331,7 +2334,7 @@ private: /// Generic conversion of any type to String. if (std::is_same_v) { - return ConvertImplGenericToString::execute(arguments, result_type, input_rows_count); + return ConvertImplGenericToString::execute(arguments, result_type, input_rows_count, context); } else throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", @@ -3287,8 +3290,17 @@ private: if (checkAndGetDataType(from_type.get())) { if (cast_type == CastType::accurateOrNull) - return &ConvertImplGenericFromString::execute; - return &ConvertImplGenericFromString::execute; + { + return [this](ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * column_nullable, size_t input_rows_count) -> ColumnPtr + { + return ConvertImplGenericFromString::execute(arguments, result_type, column_nullable, input_rows_count, context); + }; + } + + return [this](ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * column_nullable, size_t input_rows_count) -> ColumnPtr + { + return ConvertImplGenericFromString::execute(arguments, result_type, column_nullable, input_rows_count, context); + }; } return createWrapper(from_type, to_type, requested_result_is_nullable); @@ -3451,7 +3463,10 @@ private: /// Conversion from String through parsing. if (checkAndGetDataType(from_type_untyped.get())) { - return &ConvertImplGenericFromString::execute; + return [this](ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * column_nullable, size_t input_rows_count) -> ColumnPtr + { + return ConvertImplGenericFromString::execute(arguments, result_type, column_nullable, input_rows_count, context); + }; } else if (const auto * agg_type = checkAndGetDataType(from_type_untyped.get())) { @@ -3494,7 +3509,10 @@ private: /// Conversion from String through parsing. if (checkAndGetDataType(from_type_untyped.get())) { - return &ConvertImplGenericFromString::execute; + return [this](ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * column_nullable, size_t input_rows_count) -> ColumnPtr + { + return ConvertImplGenericFromString::execute(arguments, result_type, column_nullable, input_rows_count, context); + }; } DataTypePtr from_type_holder; @@ -3585,7 +3603,10 @@ private: /// Conversion from String through parsing. if (checkAndGetDataType(from_type_untyped.get())) { - return &ConvertImplGenericFromString::execute; + return [this](ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * column_nullable, size_t input_rows_count) -> ColumnPtr + { + return ConvertImplGenericFromString::execute(arguments, result_type, column_nullable, input_rows_count, context); + }; } const auto * from_type = checkAndGetDataType(from_type_untyped.get()); @@ -3934,9 +3955,9 @@ private: } else if (checkAndGetDataType(from_type.get())) { - return [] (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * nullable_source, size_t input_rows_count) + return [this](ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * nullable_source, size_t input_rows_count) { - auto res = ConvertImplGenericFromString::execute(arguments, result_type, nullable_source, input_rows_count)->assumeMutable(); + auto res = ConvertImplGenericFromString::execute(arguments, result_type, nullable_source, input_rows_count, context)->assumeMutable(); res->finalize(); return res; }; @@ -4053,9 +4074,9 @@ private: casted_variant_columns.reserve(variant_types.size()); for (size_t i = 0; i != variant_types.size(); ++i) { - auto variant_col = column_variant.getVariantPtrByLocalDiscriminator(i); + auto variant_col = column_variant.getVariantPtrByGlobalDiscriminator(i); ColumnsWithTypeAndName variant = {{variant_col, variant_types[i], "" }}; - const auto & variant_wrapper = variant_wrappers[column_variant.globalDiscriminatorByLocal(i)]; + const auto & variant_wrapper = variant_wrappers[i]; casted_variant_columns.push_back(variant_wrapper(variant, result_type, nullptr, variant_col->size())); } @@ -4065,11 +4086,11 @@ private: res->reserve(input_rows_count); for (size_t i = 0; i != input_rows_count; ++i) { - auto local_discr = local_discriminators[i]; - if (local_discr == ColumnVariant::NULL_DISCRIMINATOR) + auto global_discr = column_variant.globalDiscriminatorByLocal(local_discriminators[i]); + if (global_discr == ColumnVariant::NULL_DISCRIMINATOR) res->insertDefault(); else - res->insertFrom(*casted_variant_columns[local_discr], column_variant.offsetAt(i)); + res->insertFrom(*casted_variant_columns[global_discr], column_variant.offsetAt(i)); } return res; @@ -4109,8 +4130,8 @@ private: args[0].type = removeNullable(removeLowCardinality(args[0].type)); if (cast_type == CastType::accurateOrNull) - return ConvertImplGenericFromString::execute(args, result_type, column_nullable, input_rows_count); - return ConvertImplGenericFromString::execute(args, result_type, column_nullable, input_rows_count); + return ConvertImplGenericFromString::execute(args, result_type, column_nullable, input_rows_count, context); + return ConvertImplGenericFromString::execute(args, result_type, column_nullable, input_rows_count, context); }; } @@ -4127,7 +4148,7 @@ private: }; } - auto variant_discr_opt = to_variant.tryGetVariantDiscriminator(*removeNullableOrLowCardinalityNullable(from_type)); + auto variant_discr_opt = to_variant.tryGetVariantDiscriminator(removeNullableOrLowCardinalityNullable(from_type)->getName()); /// Cast String to Variant through parsing if it's not Variant(String). if (isStringOrFixedString(removeNullable(removeLowCardinality(from_type))) && (!variant_discr_opt || to_variant.getVariants().size() > 1)) return createStringToVariantWrapper(); @@ -4239,6 +4260,284 @@ private: return createColumnToVariantWrapper(from_type, assert_cast(*to_type)); } + WrapperType createDynamicToColumnWrapper(const DataTypePtr &) const + { + return [this] + (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * col_nullable, size_t input_rows_count) -> ColumnPtr + { + const auto & column_dynamic = assert_cast(*arguments.front().column.get()); + const auto & variant_info = column_dynamic.getVariantInfo(); + auto variant_wrapper = createVariantToColumnWrapper(assert_cast(*variant_info.variant_type), result_type); + ColumnsWithTypeAndName args = {ColumnWithTypeAndName(column_dynamic.getVariantColumnPtr(), variant_info.variant_type, "")}; + return variant_wrapper(args, result_type, col_nullable, input_rows_count); + }; + } + + WrapperType createStringToDynamicThroughParsingWrapper() const + { + return [&](ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *, size_t input_rows_count) -> ColumnPtr + { + auto column = arguments[0].column->convertToFullColumnIfLowCardinality(); + auto args = arguments; + args[0].column = column; + + const ColumnNullable * column_nullable = nullptr; + if (isColumnNullable(*args[0].column)) + { + column_nullable = assert_cast(args[0].column.get()); + args[0].column = column_nullable->getNestedColumnPtr(); + } + + args[0].type = removeNullable(removeLowCardinality(args[0].type)); + + if (cast_type == CastType::accurateOrNull) + return ConvertImplGenericFromString::execute(args, result_type, column_nullable, input_rows_count, context); + return ConvertImplGenericFromString::execute(args, result_type, column_nullable, input_rows_count, context); + }; + } + + std::pair getReducedVariant( + const ColumnVariant & variant_column, + const DataTypePtr & variant_type, + const std::unordered_map & variant_name_to_discriminator, + size_t max_result_num_variants, + const ColumnDynamic::Statistics & statistics = {}) const + { + const auto & variant_types = assert_cast(*variant_type).getVariants(); + /// First check if we don't exceed the limit in current Variant column. + if (variant_types.size() < max_result_num_variants || (variant_types.size() == max_result_num_variants && variant_name_to_discriminator.contains("String"))) + return {variant_column.getPtr(), variant_type}; + + /// We want to keep the most frequent variants and convert to string the rarest. + std::vector> variant_sizes; + variant_sizes.reserve(variant_types.size()); + std::optional old_string_discriminator; + /// List of variants that should be converted to a single String variant. + std::vector variants_to_convert_to_string; + for (size_t i = 0; i != variant_types.size(); ++i) + { + /// String variant won't be removed. + String variant_name = variant_types[i]->getName(); + + if (variant_name == "String") + { + old_string_discriminator = i; + /// For simplicity, add this variant to the list that will be converted to string, + /// so we will process it with other variants when constructing the new String variant. + variants_to_convert_to_string.push_back(i); + } + else + { + size_t size = 0; + if (statistics.data.empty()) + size = variant_column.getVariantByGlobalDiscriminator(i).size(); + else + size = statistics.data.at(variant_name); + variant_sizes.emplace_back(size, i); + } + } + + /// Sort variants by sizes, so we will keep the most frequent. + std::sort(variant_sizes.begin(), variant_sizes.end(), std::greater()); + + DataTypes remaining_variants; + remaining_variants.reserve(max_result_num_variants); + /// Add String variant in advance. + remaining_variants.push_back(std::make_shared()); + for (auto [_, discr] : variant_sizes) + { + if (remaining_variants.size() != max_result_num_variants) + remaining_variants.push_back(variant_types[discr]); + else + variants_to_convert_to_string.push_back(discr); + } + + auto reduced_variant = std::make_shared(remaining_variants); + const auto & new_variants = reduced_variant->getVariants(); + /// To construct reduced variant column we will need mapping from old to new discriminators. + std::vector old_to_new_discriminators_mapping; + old_to_new_discriminators_mapping.resize(variant_types.size()); + ColumnVariant::Discriminator string_variant_discriminator = 0; + for (size_t i = 0; i != new_variants.size(); ++i) + { + String variant_name = new_variants[i]->getName(); + if (variant_name == "String") + { + string_variant_discriminator = i; + for (auto discr : variants_to_convert_to_string) + old_to_new_discriminators_mapping[discr] = i; + } + else + { + auto old_discr = variant_name_to_discriminator.at(variant_name); + old_to_new_discriminators_mapping[old_discr] = i; + } + } + + /// Convert all reduced variants to String. + std::unordered_map variants_converted_to_string; + variants_converted_to_string.reserve(variants_to_convert_to_string.size()); + size_t string_variant_size = 0; + for (auto discr : variants_to_convert_to_string) + { + auto string_type = std::make_shared(); + auto string_wrapper = prepareUnpackDictionaries(variant_types[discr], string_type); + auto column_to_convert = ColumnWithTypeAndName(variant_column.getVariantPtrByGlobalDiscriminator(discr), variant_types[discr], ""); + ColumnsWithTypeAndName args = {column_to_convert}; + auto variant_string_column = string_wrapper(args, string_type, nullptr, column_to_convert.column->size()); + string_variant_size += variant_string_column->size(); + variants_converted_to_string[discr] = variant_string_column; + } + + /// Create new discriminators and offsets and fill new String variant according to old discriminators. + auto string_variant = ColumnString::create(); + string_variant->reserve(string_variant_size); + auto new_discriminators_column = variant_column.getLocalDiscriminatorsPtr()->cloneEmpty(); + auto & new_discriminators_data = assert_cast(*new_discriminators_column).getData(); + new_discriminators_data.reserve(variant_column.size()); + auto new_offsets = variant_column.getOffsetsPtr()->cloneEmpty(); + auto & new_offsets_data = assert_cast(*new_offsets).getData(); + new_offsets_data.reserve(variant_column.size()); + const auto & old_local_discriminators = variant_column.getLocalDiscriminators(); + const auto & old_offsets = variant_column.getOffsets(); + for (size_t i = 0; i != old_local_discriminators.size(); ++i) + { + auto old_discr = variant_column.globalDiscriminatorByLocal(old_local_discriminators[i]); + + if (old_discr == ColumnVariant::NULL_DISCRIMINATOR) + { + new_discriminators_data.push_back(ColumnVariant::NULL_DISCRIMINATOR); + new_offsets_data.push_back(0); + continue; + } + + auto new_discr = old_to_new_discriminators_mapping[old_discr]; + new_discriminators_data.push_back(new_discr); + if (new_discr != string_variant_discriminator) + { + new_offsets_data.push_back(old_offsets[i]); + } + else + { + new_offsets_data.push_back(string_variant->size()); + string_variant->insertFrom(*variants_converted_to_string[old_discr], old_offsets[i]); + } + } + + /// Create new list of variant columns. + Columns new_variant_columns; + new_variant_columns.resize(new_variants.size()); + for (size_t i = 0; i != variant_types.size(); ++i) + { + auto new_discr = old_to_new_discriminators_mapping[i]; + if (new_discr != string_variant_discriminator) + new_variant_columns[new_discr] = variant_column.getVariantPtrByGlobalDiscriminator(i); + } + new_variant_columns[string_variant_discriminator] = std::move(string_variant); + return {ColumnVariant::create(std::move(new_discriminators_column), std::move(new_offsets), new_variant_columns), reduced_variant}; + } + + WrapperType createVariantToDynamicWrapper(const DataTypePtr & from_type, const DataTypeDynamic & dynamic_type) const + { + const auto & from_variant_type = assert_cast(*from_type); + size_t max_dynamic_types = dynamic_type.getMaxDynamicTypes(); + const auto & variants = from_variant_type.getVariants(); + std::unordered_map variant_name_to_discriminator; + variant_name_to_discriminator.reserve(variants.size()); + for (size_t i = 0; i != variants.size(); ++i) + variant_name_to_discriminator[variants[i]->getName()] = i; + + return [from_type, max_dynamic_types, variant_name_to_discriminator, this] + (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable *, size_t) -> ColumnPtr + { + const auto & variant_column = assert_cast(*arguments.front().column); + auto [reduced_variant_column, reduced_variant_type] = getReducedVariant(variant_column, from_type, variant_name_to_discriminator, max_dynamic_types); + return ColumnDynamic::create(reduced_variant_column, reduced_variant_type, max_dynamic_types); + }; + } + + WrapperType createColumnToDynamicWrapper(const DataTypePtr & from_type, const DataTypeDynamic & dynamic_type) const + { + if (const auto * variant_type = typeid_cast(from_type.get())) + return createVariantToDynamicWrapper(from_type, dynamic_type); + + if (dynamic_type.getMaxDynamicTypes() == 1) + { + DataTypePtr string_type = std::make_shared(); + if (from_type->isNullable()) + string_type = makeNullable(string_type); + auto string_wrapper = prepareUnpackDictionaries(from_type, string_type); + auto variant_type = std::make_shared(DataTypes{removeNullable(string_type)}); + auto variant_wrapper = createColumnToVariantWrapper(string_type, *variant_type); + return [string_wrapper, variant_wrapper, string_type, variant_type, max_dynamic_types=dynamic_type.getMaxDynamicTypes()] + (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable * col_nullable, size_t input_rows_count) -> ColumnPtr + { + auto string_column = string_wrapper(arguments, string_type, col_nullable, input_rows_count); + auto column = ColumnWithTypeAndName(string_column, string_type, ""); + ColumnsWithTypeAndName args = {column}; + auto variant_column = variant_wrapper(args, variant_type, nullptr, string_column->size()); + return ColumnDynamic::create(variant_column, variant_type, max_dynamic_types); + }; + } + + if (context && context->getSettingsRef().cast_string_to_dynamic_use_inference && isStringOrFixedString(removeNullable(removeLowCardinality(from_type)))) + return createStringToDynamicThroughParsingWrapper(); + + auto variant_type = std::make_shared(DataTypes{removeNullableOrLowCardinalityNullable(from_type)}); + auto variant_wrapper = createColumnToVariantWrapper(from_type, *variant_type); + return [variant_wrapper, variant_type, max_dynamic_types=dynamic_type.getMaxDynamicTypes()] + (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable * col_nullable, size_t input_rows_count) -> ColumnPtr + { + auto variant_res = variant_wrapper(arguments, variant_type, col_nullable, input_rows_count); + return ColumnDynamic::create(variant_res, variant_type, max_dynamic_types); + }; + } + + WrapperType createDynamicToDynamicWrapper(const DataTypeDynamic & from_dynamic, const DataTypeDynamic & to_dynamic) const + { + size_t from_max_types = from_dynamic.getMaxDynamicTypes(); + size_t to_max_types = to_dynamic.getMaxDynamicTypes(); + if (from_max_types == to_max_types) + return createIdentityWrapper(from_dynamic.getPtr()); + + if (to_max_types > from_max_types) + { + return [to_max_types] + (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable *, size_t) -> ColumnPtr + { + const auto & column_dynamic = assert_cast(*arguments[0].column); + return ColumnDynamic::create(column_dynamic.getVariantColumnPtr(), column_dynamic.getVariantInfo(), to_max_types); + }; + } + + return [to_max_types, this] + (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable *, size_t) -> ColumnPtr + { + const auto & column_dynamic = assert_cast(*arguments[0].column); + auto [reduced_variant_column, reduced_variant_type] = getReducedVariant( + column_dynamic.getVariantColumn(), + column_dynamic.getVariantInfo().variant_type, + column_dynamic.getVariantInfo().variant_name_to_discriminator, + to_max_types, + column_dynamic.getStatistics()); + return ColumnDynamic::create(reduced_variant_column, reduced_variant_type, to_max_types); + }; + } + + /// Wrapper for conversion to/from Dynamic type + WrapperType createDynamicWrapper(const DataTypePtr & from_type, const DataTypePtr & to_type) const + { + if (const auto * from_dynamic = checkAndGetDataType(from_type.get())) + { + if (const auto * to_dynamic = checkAndGetDataType(to_type.get())) + return createDynamicToDynamicWrapper(*from_dynamic, *to_dynamic); + + return createDynamicToColumnWrapper(to_type); + } + + return createColumnToDynamicWrapper(from_type, *checkAndGetDataType(to_type.get())); + } + template WrapperType createEnumWrapper(const DataTypePtr & from_type, const DataTypeEnum * to_type) const { @@ -4418,8 +4717,11 @@ private: WrapperType prepareUnpackDictionaries(const DataTypePtr & from_type, const DataTypePtr & to_type) const { - /// Conversion from/to Variant data type is processed in a special way. + /// Conversion from/to Variant/Dynamic data type is processed in a special way. /// We don't need to remove LowCardinality/Nullable. + if (isDynamic(to_type) || isDynamic(from_type)) + return createDynamicWrapper(from_type, to_type); + if (isVariant(to_type) || isVariant(from_type)) return createVariantWrapper(from_type, to_type); @@ -4733,7 +5035,7 @@ private: if (to_type->getCustomSerialization() && to_type->getCustomName()) { - ret = [this, requested_result_is_nullable]( + ret = [requested_result_is_nullable, this]( ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * column_nullable, @@ -4744,9 +5046,9 @@ private: wrapped_result_type = makeNullable(result_type); if (this->cast_type == CastType::accurateOrNull) return ConvertImplGenericFromString::execute( - arguments, wrapped_result_type, column_nullable, input_rows_count); + arguments, wrapped_result_type, column_nullable, input_rows_count, context); return ConvertImplGenericFromString::execute( - arguments, wrapped_result_type, column_nullable, input_rows_count); + arguments, wrapped_result_type, column_nullable, input_rows_count, context); }; return true; } @@ -4782,9 +5084,9 @@ private: } else if (from_type->getCustomSerialization()) { - ret = [](ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *, size_t input_rows_count) -> ColumnPtr + ret = [this](ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *, size_t input_rows_count) -> ColumnPtr { - return ConvertImplGenericToString::execute(arguments, result_type, input_rows_count); + return ConvertImplGenericToString::execute(arguments, result_type, input_rows_count, context); }; return true; } diff --git a/src/Functions/FunctionsHashing.h b/src/Functions/FunctionsHashing.h index 3d8a11319c4..27717ea3611 100644 --- a/src/Functions/FunctionsHashing.h +++ b/src/Functions/FunctionsHashing.h @@ -49,6 +49,8 @@ #include #include +#include + namespace DB { @@ -75,17 +77,29 @@ namespace impl ColumnPtr key0; ColumnPtr key1; bool is_const; + const ColumnArray::Offsets * offsets{}; size_t size() const { assert(key0 && key1); assert(key0->size() == key1->size()); + assert(offsets == nullptr || offsets->size() == key0->size()); + if (offsets != nullptr) + return offsets->back(); return key0->size(); } SipHashKey getKey(size_t i) const { if (is_const) i = 0; + if (offsets != nullptr) + { + const auto *const begin = offsets->begin(); + const auto * upper = std::upper_bound(begin, offsets->end(), i); + if (upper == offsets->end()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "offset {} not found in function SipHashKeyColumns::getKey", i); + i = upper - begin; + } const auto & key0data = assert_cast(*key0).getData(); const auto & key1data = assert_cast(*key1).getData(); return {key0data[i], key1data[i]}; @@ -1112,7 +1126,15 @@ private: typename ColumnVector::Container vec_temp(nested_size); bool nested_is_first = true; - executeForArgument(key_cols, nested_type, nested_column, vec_temp, nested_is_first); + + if constexpr (Keyed) + { + KeyColumnsType key_cols_tmp{key_cols}; + key_cols_tmp.offsets = &offsets; + executeForArgument(key_cols_tmp, nested_type, nested_column, vec_temp, nested_is_first); + } + else + executeForArgument(key_cols, nested_type, nested_column, vec_temp, nested_is_first); const size_t size = offsets.size(); diff --git a/src/Functions/FunctionsLogical.cpp b/src/Functions/FunctionsLogical.cpp index 7e7ae76d6eb..2f5ce6deebf 100644 --- a/src/Functions/FunctionsLogical.cpp +++ b/src/Functions/FunctionsLogical.cpp @@ -170,7 +170,7 @@ public: : vec(in[in.size() - N]->getData()), next(in) {} /// Returns a combination of values in the i-th row of all columns stored in the constructor. - inline ResultValueType apply(const size_t i) const + ResultValueType apply(const size_t i) const { const auto a = !!vec[i]; return Op::apply(a, next.apply(i)); @@ -190,7 +190,7 @@ public: explicit AssociativeApplierImpl(const UInt8ColumnPtrs & in) : vec(in[in.size() - 1]->getData()) {} - inline ResultValueType apply(const size_t i) const { return !!vec[i]; } + ResultValueType apply(const size_t i) const { return !!vec[i]; } private: const UInt8Container & vec; @@ -291,7 +291,7 @@ public: } /// Returns a combination of values in the i-th row of all columns stored in the constructor. - inline ResultValueType apply(const size_t i) const + ResultValueType apply(const size_t i) const { return Op::ternaryApply(vec[i], next.apply(i)); } @@ -315,7 +315,7 @@ public: TernaryValueBuilder::build(in[in.size() - 1], vec.data()); } - inline ResultValueType apply(const size_t i) const { return vec[i]; } + ResultValueType apply(const size_t i) const { return vec[i]; } private: UInt8Container vec; diff --git a/src/Functions/FunctionsLogical.h b/src/Functions/FunctionsLogical.h index 41464329f79..3c2eb3ee0b8 100644 --- a/src/Functions/FunctionsLogical.h +++ b/src/Functions/FunctionsLogical.h @@ -84,47 +84,47 @@ struct AndImpl { using ResultType = UInt8; - static inline constexpr bool isSaturable() { return true; } + static constexpr bool isSaturable() { return true; } /// Final value in two-valued logic (no further operations with True, False will change this value) - static inline constexpr bool isSaturatedValue(bool a) { return !a; } + static constexpr bool isSaturatedValue(bool a) { return !a; } /// Final value in three-valued logic (no further operations with True, False, Null will change this value) - static inline constexpr bool isSaturatedValueTernary(UInt8 a) { return a == Ternary::False; } + static constexpr bool isSaturatedValueTernary(UInt8 a) { return a == Ternary::False; } - static inline constexpr ResultType apply(UInt8 a, UInt8 b) { return a & b; } + static constexpr ResultType apply(UInt8 a, UInt8 b) { return a & b; } - static inline constexpr ResultType ternaryApply(UInt8 a, UInt8 b) { return std::min(a, b); } + static constexpr ResultType ternaryApply(UInt8 a, UInt8 b) { return std::min(a, b); } /// Will use three-valued logic for NULLs (see above) or default implementation (any operation with NULL returns NULL). - static inline constexpr bool specialImplementationForNulls() { return true; } + static constexpr bool specialImplementationForNulls() { return true; } }; struct OrImpl { using ResultType = UInt8; - static inline constexpr bool isSaturable() { return true; } - static inline constexpr bool isSaturatedValue(bool a) { return a; } - static inline constexpr bool isSaturatedValueTernary(UInt8 a) { return a == Ternary::True; } - static inline constexpr ResultType apply(UInt8 a, UInt8 b) { return a | b; } - static inline constexpr ResultType ternaryApply(UInt8 a, UInt8 b) { return std::max(a, b); } - static inline constexpr bool specialImplementationForNulls() { return true; } + static constexpr bool isSaturable() { return true; } + static constexpr bool isSaturatedValue(bool a) { return a; } + static constexpr bool isSaturatedValueTernary(UInt8 a) { return a == Ternary::True; } + static constexpr ResultType apply(UInt8 a, UInt8 b) { return a | b; } + static constexpr ResultType ternaryApply(UInt8 a, UInt8 b) { return std::max(a, b); } + static constexpr bool specialImplementationForNulls() { return true; } }; struct XorImpl { using ResultType = UInt8; - static inline constexpr bool isSaturable() { return false; } - static inline constexpr bool isSaturatedValue(bool) { return false; } - static inline constexpr bool isSaturatedValueTernary(UInt8) { return false; } - static inline constexpr ResultType apply(UInt8 a, UInt8 b) { return a != b; } - static inline constexpr ResultType ternaryApply(UInt8 a, UInt8 b) { return a != b; } - static inline constexpr bool specialImplementationForNulls() { return false; } + static constexpr bool isSaturable() { return false; } + static constexpr bool isSaturatedValue(bool) { return false; } + static constexpr bool isSaturatedValueTernary(UInt8) { return false; } + static constexpr ResultType apply(UInt8 a, UInt8 b) { return a != b; } + static constexpr ResultType ternaryApply(UInt8 a, UInt8 b) { return a != b; } + static constexpr bool specialImplementationForNulls() { return false; } #if USE_EMBEDDED_COMPILER - static inline llvm::Value * apply(llvm::IRBuilder<> & builder, llvm::Value * a, llvm::Value * b) + static llvm::Value * apply(llvm::IRBuilder<> & builder, llvm::Value * a, llvm::Value * b) { return builder.CreateXor(a, b); } @@ -136,13 +136,13 @@ struct NotImpl { using ResultType = UInt8; - static inline ResultType apply(A a) + static ResultType apply(A a) { return !static_cast(a); } #if USE_EMBEDDED_COMPILER - static inline llvm::Value * apply(llvm::IRBuilder<> & builder, llvm::Value * a) + static llvm::Value * apply(llvm::IRBuilder<> & builder, llvm::Value * a) { return builder.CreateNot(a); } diff --git a/src/Functions/FunctionsRound.h b/src/Functions/FunctionsRound.h index 99f3a14dfec..1f20fbff24e 100644 --- a/src/Functions/FunctionsRound.h +++ b/src/Functions/FunctionsRound.h @@ -296,7 +296,7 @@ class FloatRoundingComputation : public BaseFloatRoundingComputation using Base = BaseFloatRoundingComputation; public: - static inline void compute(const T * __restrict in, const typename Base::VectorType & scale, T * __restrict out) + static void compute(const T * __restrict in, const typename Base::VectorType & scale, T * __restrict out) { auto val = Base::load(in); diff --git a/src/Functions/FunctionsStringSimilarity.cpp b/src/Functions/FunctionsStringSimilarity.cpp index aadf5c246fc..7b3f2337c89 100644 --- a/src/Functions/FunctionsStringSimilarity.cpp +++ b/src/Functions/FunctionsStringSimilarity.cpp @@ -275,7 +275,7 @@ struct NgramDistanceImpl } template - static inline auto dispatchSearcher(Callback callback, Args &&... args) + static auto dispatchSearcher(Callback callback, Args &&... args) { if constexpr (!UTF8) return callback(std::forward(args)..., readASCIICodePoints, calculateASCIIHash); diff --git a/src/Functions/FunctionsTimeWindow.h b/src/Functions/FunctionsTimeWindow.h index 6183d25c8bd..7522bd374a2 100644 --- a/src/Functions/FunctionsTimeWindow.h +++ b/src/Functions/FunctionsTimeWindow.h @@ -97,7 +97,7 @@ template<> \ template <> \ struct AddTime \ { \ - static inline auto execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone) \ + static auto execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone) \ { \ return time_zone.add##INTERVAL_KIND##s(ExtendedDayNum(d), delta); \ } \ @@ -110,7 +110,7 @@ template<> \ template <> struct AddTime { - static inline NO_SANITIZE_UNDEFINED ExtendedDayNum execute(UInt16 d, UInt64 delta, const DateLUTImpl &) + static NO_SANITIZE_UNDEFINED ExtendedDayNum execute(UInt16 d, UInt64 delta, const DateLUTImpl &) { return ExtendedDayNum(static_cast(d + delta * 7)); } @@ -120,7 +120,7 @@ template<> \ template <> \ struct AddTime \ { \ - static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &) \ + static NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &) \ { return static_cast(t + delta * (INTERVAL)); } \ }; ADD_TIME(Day, 86400) @@ -133,7 +133,7 @@ template<> \ template <> \ struct AddTime \ { \ - static inline NO_SANITIZE_UNDEFINED Int64 execute(Int64 t, UInt64 delta, const UInt32 scale) \ + static NO_SANITIZE_UNDEFINED Int64 execute(Int64 t, UInt64 delta, const UInt32 scale) \ { \ if (scale < (DEF_SCALE)) \ { \ diff --git a/src/Functions/GCDLCMImpl.h b/src/Functions/GCDLCMImpl.h index df531363c31..094c248497b 100644 --- a/src/Functions/GCDLCMImpl.h +++ b/src/Functions/GCDLCMImpl.h @@ -26,7 +26,7 @@ struct GCDLCMImpl static const constexpr bool allow_string_integer = false; template - static inline Result apply(A a, B b) + static Result apply(A a, B b) { throwIfDivisionLeadsToFPE(typename NumberTraits::ToInteger::Type(a), typename NumberTraits::ToInteger::Type(b)); throwIfDivisionLeadsToFPE(typename NumberTraits::ToInteger::Type(b), typename NumberTraits::ToInteger::Type(a)); diff --git a/src/Functions/GregorianDate.cpp b/src/Functions/GregorianDate.cpp index eb7ef4abe56..91861e8bbd2 100644 --- a/src/Functions/GregorianDate.cpp +++ b/src/Functions/GregorianDate.cpp @@ -20,12 +20,12 @@ namespace ErrorCodes namespace { - inline constexpr bool is_leap_year(int32_t year) + constexpr bool is_leap_year(int32_t year) { return (year % 4 == 0) && ((year % 400 == 0) || (year % 100 != 0)); } - inline constexpr uint8_t monthLength(bool is_leap_year, uint8_t month) + constexpr uint8_t monthLength(bool is_leap_year, uint8_t month) { switch (month) { @@ -49,7 +49,7 @@ namespace /** Integer division truncated toward negative infinity. */ template - inline constexpr I div(I x, J y) + constexpr I div(I x, J y) { const auto y_cast = static_cast(y); if (x > 0 && y_cast < 0) @@ -63,7 +63,7 @@ namespace /** Integer modulus, satisfying div(x, y)*y + mod(x, y) == x. */ template - inline constexpr I mod(I x, J y) + constexpr I mod(I x, J y) { const auto y_cast = static_cast(y); const auto r = x % y_cast; @@ -76,7 +76,7 @@ namespace /** Like std::min(), but the type of operands may differ. */ template - inline constexpr I min(I x, J y) + constexpr I min(I x, J y) { const auto y_cast = static_cast(y); return x < y_cast ? x : y_cast; diff --git a/src/Functions/TransformDateTime64.h b/src/Functions/TransformDateTime64.h index 896e9d8ca48..b52ccd3cce0 100644 --- a/src/Functions/TransformDateTime64.h +++ b/src/Functions/TransformDateTime64.h @@ -53,7 +53,7 @@ public: {} template - inline auto NO_SANITIZE_UNDEFINED execute(const DateTime64 & t, Args && ... args) const + auto NO_SANITIZE_UNDEFINED execute(const DateTime64 & t, Args && ... args) const { /// Type conversion from float to integer may be required. /// We are Ok with implementation specific result for out of range and denormals conversion. @@ -90,14 +90,14 @@ public: template requires(!std::same_as) - inline auto execute(const T & t, Args &&... args) const + auto execute(const T & t, Args &&... args) const { return wrapped_transform.execute(t, std::forward(args)...); } template - inline auto NO_SANITIZE_UNDEFINED executeExtendedResult(const DateTime64 & t, Args && ... args) const + auto NO_SANITIZE_UNDEFINED executeExtendedResult(const DateTime64 & t, Args && ... args) const { /// Type conversion from float to integer may be required. /// We are Ok with implementation specific result for out of range and denormals conversion. @@ -131,7 +131,7 @@ public: template requires (!std::same_as) - inline auto executeExtendedResult(const T & t, Args && ... args) const + auto executeExtendedResult(const T & t, Args && ... args) const { return wrapped_transform.executeExtendedResult(t, std::forward(args)...); } diff --git a/src/Functions/abs.cpp b/src/Functions/abs.cpp index 0cd313caf1e..9ac2363f765 100644 --- a/src/Functions/abs.cpp +++ b/src/Functions/abs.cpp @@ -12,7 +12,7 @@ struct AbsImpl using ResultType = std::conditional_t, A, typename NumberTraits::ResultOfAbs::Type>; static constexpr bool allow_string_or_fixed_string = false; - static inline NO_SANITIZE_UNDEFINED ResultType apply(A a) + static NO_SANITIZE_UNDEFINED ResultType apply(A a) { if constexpr (is_decimal) return a < A(0) ? A(-a) : a; diff --git a/src/Functions/array/arrayIndex.h b/src/Functions/array/arrayIndex.h index 395f96bbffb..fa9b3dc92dd 100644 --- a/src/Functions/array/arrayIndex.h +++ b/src/Functions/array/arrayIndex.h @@ -322,7 +322,7 @@ private: } template - static inline void invokeCheckNullMaps( + static void invokeCheckNullMaps( const ColumnString::Chars & data, const ColumnArray::Offsets & offsets, const ColumnString::Offsets & str_offsets, const ColumnString::Chars & values, OffsetT item_offsets, @@ -339,7 +339,7 @@ private: } public: - static inline void process( + static void process( const ColumnString::Chars & data, const ColumnArray::Offsets & offsets, const ColumnString::Offsets & string_offsets, const ColumnString::Chars & item_values, Offset item_offsets, PaddedPODArray & result, @@ -348,7 +348,7 @@ public: invokeCheckNullMaps(data, offsets, string_offsets, item_values, item_offsets, result, data_map, item_map); } - static inline void process( + static void process( const ColumnString::Chars & data, const ColumnArray::Offsets & offsets, const ColumnString::Offsets & string_offsets, const ColumnString::Chars & item_values, const ColumnString::Offsets & item_offsets, PaddedPODArray & result, @@ -467,10 +467,10 @@ private: NullMaps maps; ResultColumnPtr result { ResultColumnType::create() }; - inline void moveResult() { result_column = std::move(result); } + void moveResult() { result_column = std::move(result); } }; - static inline bool allowArguments(const DataTypePtr & inner_type, const DataTypePtr & arg) + static bool allowArguments(const DataTypePtr & inner_type, const DataTypePtr & arg) { auto inner_type_decayed = removeNullable(removeLowCardinality(inner_type)); auto arg_decayed = removeNullable(removeLowCardinality(arg)); @@ -633,7 +633,7 @@ private: * (s1, s1, s2, ...), (s2, s1, s2, ...), (s3, s1, s2, ...) */ template - static inline ColumnPtr executeIntegral(const ColumnsWithTypeAndName & arguments) + static ColumnPtr executeIntegral(const ColumnsWithTypeAndName & arguments) { const ColumnArray * const left = checkAndGetColumn(arguments[0].column.get()); @@ -658,14 +658,14 @@ private: } template - static inline bool executeIntegral(ExecutionData& data) + static bool executeIntegral(ExecutionData& data) { return (executeIntegralExpanded(data) || ...); } /// Invoke executeIntegralImpl with such parameters: (A, other1), (A, other2), ... template - static inline bool executeIntegralExpanded(ExecutionData& data) + static bool executeIntegralExpanded(ExecutionData& data) { return (executeIntegralImpl(data) || ...); } diff --git a/src/Functions/array/arrayNorm.cpp b/src/Functions/array/arrayNorm.cpp index e87eff6add1..ca1e8f21aee 100644 --- a/src/Functions/array/arrayNorm.cpp +++ b/src/Functions/array/arrayNorm.cpp @@ -25,19 +25,19 @@ struct L1Norm struct ConstParams {}; template - inline static ResultType accumulate(ResultType result, ResultType value, const ConstParams &) + static ResultType accumulate(ResultType result, ResultType value, const ConstParams &) { return result + fabs(value); } template - inline static ResultType combine(ResultType result, ResultType other_result, const ConstParams &) + static ResultType combine(ResultType result, ResultType other_result, const ConstParams &) { return result + other_result; } template - inline static ResultType finalize(ResultType result, const ConstParams &) + static ResultType finalize(ResultType result, const ConstParams &) { return result; } @@ -50,19 +50,19 @@ struct L2Norm struct ConstParams {}; template - inline static ResultType accumulate(ResultType result, ResultType value, const ConstParams &) + static ResultType accumulate(ResultType result, ResultType value, const ConstParams &) { return result + value * value; } template - inline static ResultType combine(ResultType result, ResultType other_result, const ConstParams &) + static ResultType combine(ResultType result, ResultType other_result, const ConstParams &) { return result + other_result; } template - inline static ResultType finalize(ResultType result, const ConstParams &) + static ResultType finalize(ResultType result, const ConstParams &) { return sqrt(result); } @@ -73,7 +73,7 @@ struct L2SquaredNorm : L2Norm static constexpr auto name = "L2Squared"; template - inline static ResultType finalize(ResultType result, const ConstParams &) + static ResultType finalize(ResultType result, const ConstParams &) { return result; } @@ -91,19 +91,19 @@ struct LpNorm }; template - inline static ResultType accumulate(ResultType result, ResultType value, const ConstParams & params) + static ResultType accumulate(ResultType result, ResultType value, const ConstParams & params) { return result + static_cast(std::pow(fabs(value), params.power)); } template - inline static ResultType combine(ResultType result, ResultType other_result, const ConstParams &) + static ResultType combine(ResultType result, ResultType other_result, const ConstParams &) { return result + other_result; } template - inline static ResultType finalize(ResultType result, const ConstParams & params) + static ResultType finalize(ResultType result, const ConstParams & params) { return static_cast(std::pow(result, params.inverted_power)); } @@ -116,19 +116,19 @@ struct LinfNorm struct ConstParams {}; template - inline static ResultType accumulate(ResultType result, ResultType value, const ConstParams &) + static ResultType accumulate(ResultType result, ResultType value, const ConstParams &) { return fmax(result, fabs(value)); } template - inline static ResultType combine(ResultType result, ResultType other_result, const ConstParams &) + static ResultType combine(ResultType result, ResultType other_result, const ConstParams &) { return fmax(result, other_result); } template - inline static ResultType finalize(ResultType result, const ConstParams &) + static ResultType finalize(ResultType result, const ConstParams &) { return result; } diff --git a/src/Functions/bitAnd.cpp b/src/Functions/bitAnd.cpp index 8efc5181919..c6ab9023142 100644 --- a/src/Functions/bitAnd.cpp +++ b/src/Functions/bitAnd.cpp @@ -20,7 +20,7 @@ struct BitAndImpl static constexpr bool allow_string_integer = false; template - static inline Result apply(A a, B b) + static Result apply(A a, B b) { return static_cast(a) & static_cast(b); } @@ -28,7 +28,7 @@ struct BitAndImpl #if USE_EMBEDDED_COMPILER static constexpr bool compilable = true; - static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool) + static llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool) { if (!left->getType()->isIntegerTy()) throw Exception(ErrorCodes::LOGICAL_ERROR, "BitAndImpl expected an integral type"); diff --git a/src/Functions/bitBoolMaskAnd.cpp b/src/Functions/bitBoolMaskAnd.cpp index 11c0c1d1b7d..bd89b6eb69a 100644 --- a/src/Functions/bitBoolMaskAnd.cpp +++ b/src/Functions/bitBoolMaskAnd.cpp @@ -25,7 +25,7 @@ struct BitBoolMaskAndImpl static const constexpr bool allow_string_integer = false; template - static inline Result apply([[maybe_unused]] A left, [[maybe_unused]] B right) + static Result apply([[maybe_unused]] A left, [[maybe_unused]] B right) { // Should be a logical error, but this function is callable from SQL. // Need to investigate this. diff --git a/src/Functions/bitBoolMaskOr.cpp b/src/Functions/bitBoolMaskOr.cpp index 7940bf3e2ca..1ddf2d258f8 100644 --- a/src/Functions/bitBoolMaskOr.cpp +++ b/src/Functions/bitBoolMaskOr.cpp @@ -25,7 +25,7 @@ struct BitBoolMaskOrImpl static const constexpr bool allow_string_integer = false; template - static inline Result apply([[maybe_unused]] A left, [[maybe_unused]] B right) + static Result apply([[maybe_unused]] A left, [[maybe_unused]] B right) { if constexpr (!std::is_same_v || !std::is_same_v) // Should be a logical error, but this function is callable from SQL. diff --git a/src/Functions/bitCount.cpp b/src/Functions/bitCount.cpp index f1a3ac897c1..68555b1386c 100644 --- a/src/Functions/bitCount.cpp +++ b/src/Functions/bitCount.cpp @@ -13,7 +13,7 @@ struct BitCountImpl using ResultType = std::conditional_t<(sizeof(A) * 8 >= 256), UInt16, UInt8>; static constexpr bool allow_string_or_fixed_string = true; - static inline ResultType apply(A a) + static ResultType apply(A a) { /// We count bits in the value representation in memory. For example, we support floats. /// We need to avoid sign-extension when converting signed numbers to larger type. So, uint8_t(-1) has 8 bits. diff --git a/src/Functions/bitHammingDistance.cpp b/src/Functions/bitHammingDistance.cpp index f00f38b61af..f8a1a95ae14 100644 --- a/src/Functions/bitHammingDistance.cpp +++ b/src/Functions/bitHammingDistance.cpp @@ -19,7 +19,7 @@ struct BitHammingDistanceImpl static constexpr bool allow_string_integer = false; template - static inline NO_SANITIZE_UNDEFINED Result apply(A a, B b) + static NO_SANITIZE_UNDEFINED Result apply(A a, B b) { /// Note: it's unspecified if signed integers should be promoted with sign-extension or with zero-fill. /// This behavior can change in the future. diff --git a/src/Functions/bitNot.cpp b/src/Functions/bitNot.cpp index 62ebdc7c52a..44dc77bb7bb 100644 --- a/src/Functions/bitNot.cpp +++ b/src/Functions/bitNot.cpp @@ -19,7 +19,7 @@ struct BitNotImpl using ResultType = typename NumberTraits::ResultOfBitNot::Type; static constexpr bool allow_string_or_fixed_string = true; - static inline ResultType NO_SANITIZE_UNDEFINED apply(A a) + static ResultType NO_SANITIZE_UNDEFINED apply(A a) { return ~static_cast(a); } @@ -27,7 +27,7 @@ struct BitNotImpl #if USE_EMBEDDED_COMPILER static constexpr bool compilable = true; - static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * arg, bool) + static llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * arg, bool) { if (!arg->getType()->isIntegerTy()) throw Exception(ErrorCodes::LOGICAL_ERROR, "BitNotImpl expected an integral type"); diff --git a/src/Functions/bitOr.cpp b/src/Functions/bitOr.cpp index 9e19fc55219..22ce15d892d 100644 --- a/src/Functions/bitOr.cpp +++ b/src/Functions/bitOr.cpp @@ -19,7 +19,7 @@ struct BitOrImpl static constexpr bool allow_string_integer = false; template - static inline Result apply(A a, B b) + static Result apply(A a, B b) { return static_cast(a) | static_cast(b); } @@ -27,7 +27,7 @@ struct BitOrImpl #if USE_EMBEDDED_COMPILER static constexpr bool compilable = true; - static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool) + static llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool) { if (!left->getType()->isIntegerTy()) throw Exception(ErrorCodes::LOGICAL_ERROR, "BitOrImpl expected an integral type"); diff --git a/src/Functions/bitRotateLeft.cpp b/src/Functions/bitRotateLeft.cpp index c72466b8d49..2fe2c4e0f1d 100644 --- a/src/Functions/bitRotateLeft.cpp +++ b/src/Functions/bitRotateLeft.cpp @@ -20,7 +20,7 @@ struct BitRotateLeftImpl static const constexpr bool allow_string_integer = false; template - static inline NO_SANITIZE_UNDEFINED Result apply(A a [[maybe_unused]], B b [[maybe_unused]]) + static NO_SANITIZE_UNDEFINED Result apply(A a [[maybe_unused]], B b [[maybe_unused]]) { if constexpr (is_big_int_v || is_big_int_v) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Bit rotate is not implemented for big integers"); @@ -32,7 +32,7 @@ struct BitRotateLeftImpl #if USE_EMBEDDED_COMPILER static constexpr bool compilable = true; - static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool) + static llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool) { if (!left->getType()->isIntegerTy()) throw Exception(ErrorCodes::LOGICAL_ERROR, "BitRotateLeftImpl expected an integral type"); diff --git a/src/Functions/bitRotateRight.cpp b/src/Functions/bitRotateRight.cpp index 045758f9a31..a2f0fe12324 100644 --- a/src/Functions/bitRotateRight.cpp +++ b/src/Functions/bitRotateRight.cpp @@ -20,7 +20,7 @@ struct BitRotateRightImpl static const constexpr bool allow_string_integer = false; template - static inline NO_SANITIZE_UNDEFINED Result apply(A a [[maybe_unused]], B b [[maybe_unused]]) + static NO_SANITIZE_UNDEFINED Result apply(A a [[maybe_unused]], B b [[maybe_unused]]) { if constexpr (is_big_int_v || is_big_int_v) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Bit rotate is not implemented for big integers"); @@ -32,7 +32,7 @@ struct BitRotateRightImpl #if USE_EMBEDDED_COMPILER static constexpr bool compilable = true; - static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool) + static llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool) { if (!left->getType()->isIntegerTy()) throw Exception(ErrorCodes::LOGICAL_ERROR, "BitRotateRightImpl expected an integral type"); diff --git a/src/Functions/bitShiftLeft.cpp b/src/Functions/bitShiftLeft.cpp index 7b3748edb5c..c366a1ecb44 100644 --- a/src/Functions/bitShiftLeft.cpp +++ b/src/Functions/bitShiftLeft.cpp @@ -20,7 +20,7 @@ struct BitShiftLeftImpl static const constexpr bool allow_string_integer = true; template - static inline NO_SANITIZE_UNDEFINED Result apply(A a [[maybe_unused]], B b [[maybe_unused]]) + static NO_SANITIZE_UNDEFINED Result apply(A a [[maybe_unused]], B b [[maybe_unused]]) { if constexpr (is_big_int_v) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "BitShiftLeft is not implemented for big integers as second argument"); @@ -145,7 +145,7 @@ struct BitShiftLeftImpl #if USE_EMBEDDED_COMPILER static constexpr bool compilable = true; - static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool) + static llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool) { if (!left->getType()->isIntegerTy()) throw Exception(ErrorCodes::LOGICAL_ERROR, "BitShiftLeftImpl expected an integral type"); diff --git a/src/Functions/bitShiftRight.cpp b/src/Functions/bitShiftRight.cpp index 21a0f7584aa..1c37cd3bf4c 100644 --- a/src/Functions/bitShiftRight.cpp +++ b/src/Functions/bitShiftRight.cpp @@ -21,7 +21,7 @@ struct BitShiftRightImpl static const constexpr bool allow_string_integer = true; template - static inline NO_SANITIZE_UNDEFINED Result apply(A a [[maybe_unused]], B b [[maybe_unused]]) + static NO_SANITIZE_UNDEFINED Result apply(A a [[maybe_unused]], B b [[maybe_unused]]) { if constexpr (is_big_int_v) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "BitShiftRight is not implemented for big integers as second argument"); @@ -31,7 +31,7 @@ struct BitShiftRightImpl return static_cast(a) >> static_cast(b); } - static inline NO_SANITIZE_UNDEFINED void bitShiftRightForBytes(const UInt8 * op_pointer, const UInt8 * begin, UInt8 * out, const size_t shift_right_bits) + static NO_SANITIZE_UNDEFINED void bitShiftRightForBytes(const UInt8 * op_pointer, const UInt8 * begin, UInt8 * out, const size_t shift_right_bits) { while (op_pointer > begin) { @@ -123,7 +123,7 @@ struct BitShiftRightImpl #if USE_EMBEDDED_COMPILER static constexpr bool compilable = true; - static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool is_signed) + static llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool is_signed) { if (!left->getType()->isIntegerTy()) throw Exception(ErrorCodes::LOGICAL_ERROR, "BitShiftRightImpl expected an integral type"); diff --git a/src/Functions/bitSwapLastTwo.cpp b/src/Functions/bitSwapLastTwo.cpp index d8957598c62..4ff436d5708 100644 --- a/src/Functions/bitSwapLastTwo.cpp +++ b/src/Functions/bitSwapLastTwo.cpp @@ -21,7 +21,7 @@ struct BitSwapLastTwoImpl using ResultType = UInt8; static constexpr const bool allow_string_or_fixed_string = false; - static inline ResultType NO_SANITIZE_UNDEFINED apply([[maybe_unused]] A a) + static ResultType NO_SANITIZE_UNDEFINED apply([[maybe_unused]] A a) { if constexpr (!std::is_same_v) // Should be a logical error, but this function is callable from SQL. @@ -35,7 +35,7 @@ struct BitSwapLastTwoImpl #if USE_EMBEDDED_COMPILER static constexpr bool compilable = true; -static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * arg, bool) +static llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * arg, bool) { if (!arg->getType()->isIntegerTy()) throw Exception(ErrorCodes::LOGICAL_ERROR, "__bitSwapLastTwo expected an integral type"); diff --git a/src/Functions/bitTest.cpp b/src/Functions/bitTest.cpp index 4c9c6aa2dfb..78ec9c8b773 100644 --- a/src/Functions/bitTest.cpp +++ b/src/Functions/bitTest.cpp @@ -21,7 +21,7 @@ struct BitTestImpl static const constexpr bool allow_string_integer = false; template - NO_SANITIZE_UNDEFINED static inline Result apply(A a [[maybe_unused]], B b [[maybe_unused]]) + NO_SANITIZE_UNDEFINED static Result apply(A a [[maybe_unused]], B b [[maybe_unused]]) { if constexpr (is_big_int_v || is_big_int_v) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "bitTest is not implemented for big integers as second argument"); diff --git a/src/Functions/bitTestAll.cpp b/src/Functions/bitTestAll.cpp index a2dcef3eb96..92f63bfa262 100644 --- a/src/Functions/bitTestAll.cpp +++ b/src/Functions/bitTestAll.cpp @@ -9,7 +9,7 @@ namespace struct BitTestAllImpl { template - static inline UInt8 apply(A a, B b) { return (a & b) == b; } + static UInt8 apply(A a, B b) { return (a & b) == b; } }; struct NameBitTestAll { static constexpr auto name = "bitTestAll"; }; diff --git a/src/Functions/bitTestAny.cpp b/src/Functions/bitTestAny.cpp index 6b20d6c184c..c8f445d524e 100644 --- a/src/Functions/bitTestAny.cpp +++ b/src/Functions/bitTestAny.cpp @@ -9,7 +9,7 @@ namespace struct BitTestAnyImpl { template - static inline UInt8 apply(A a, B b) { return (a & b) != 0; } + static UInt8 apply(A a, B b) { return (a & b) != 0; } }; struct NameBitTestAny { static constexpr auto name = "bitTestAny"; }; diff --git a/src/Functions/bitWrapperFunc.cpp b/src/Functions/bitWrapperFunc.cpp index 99c06172c30..d243a6724a8 100644 --- a/src/Functions/bitWrapperFunc.cpp +++ b/src/Functions/bitWrapperFunc.cpp @@ -21,7 +21,7 @@ struct BitWrapperFuncImpl using ResultType = UInt8; static constexpr const bool allow_string_or_fixed_string = false; - static inline ResultType NO_SANITIZE_UNDEFINED apply(A a [[maybe_unused]]) + static ResultType NO_SANITIZE_UNDEFINED apply(A a [[maybe_unused]]) { // Should be a logical error, but this function is callable from SQL. // Need to investigate this. diff --git a/src/Functions/bitXor.cpp b/src/Functions/bitXor.cpp index 78c4c64d06e..43004c6f676 100644 --- a/src/Functions/bitXor.cpp +++ b/src/Functions/bitXor.cpp @@ -19,7 +19,7 @@ struct BitXorImpl static const constexpr bool allow_string_integer = false; template - static inline Result apply(A a, B b) + static Result apply(A a, B b) { return static_cast(a) ^ static_cast(b); } @@ -27,7 +27,7 @@ struct BitXorImpl #if USE_EMBEDDED_COMPILER static constexpr bool compilable = true; - static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool) + static llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool) { if (!left->getType()->isIntegerTy()) throw Exception(ErrorCodes::LOGICAL_ERROR, "BitXorImpl expected an integral type"); diff --git a/src/Functions/dateName.cpp b/src/Functions/dateName.cpp index 4d7a4f0b53d..c06dfe15dc4 100644 --- a/src/Functions/dateName.cpp +++ b/src/Functions/dateName.cpp @@ -214,7 +214,7 @@ private: template struct QuarterWriter { - static inline void write(WriteBuffer & buffer, Time source, const DateLUTImpl & timezone) + static void write(WriteBuffer & buffer, Time source, const DateLUTImpl & timezone) { writeText(ToQuarterImpl::execute(source, timezone), buffer); } @@ -223,7 +223,7 @@ private: template struct MonthWriter { - static inline void write(WriteBuffer & buffer, Time source, const DateLUTImpl & timezone) + static void write(WriteBuffer & buffer, Time source, const DateLUTImpl & timezone) { const auto month = ToMonthImpl::execute(source, timezone); static constexpr std::string_view month_names[] = @@ -249,7 +249,7 @@ private: template struct WeekWriter { - static inline void write(WriteBuffer & buffer, Time source, const DateLUTImpl & timezone) + static void write(WriteBuffer & buffer, Time source, const DateLUTImpl & timezone) { writeText(ToISOWeekImpl::execute(source, timezone), buffer); } @@ -258,7 +258,7 @@ private: template struct DayOfYearWriter { - static inline void write(WriteBuffer & buffer, Time source, const DateLUTImpl & timezone) + static void write(WriteBuffer & buffer, Time source, const DateLUTImpl & timezone) { writeText(ToDayOfYearImpl::execute(source, timezone), buffer); } @@ -267,7 +267,7 @@ private: template struct DayWriter { - static inline void write(WriteBuffer & buffer, Time source, const DateLUTImpl & timezone) + static void write(WriteBuffer & buffer, Time source, const DateLUTImpl & timezone) { writeText(ToDayOfMonthImpl::execute(source, timezone), buffer); } @@ -276,7 +276,7 @@ private: template struct WeekDayWriter { - static inline void write(WriteBuffer & buffer, Time source, const DateLUTImpl & timezone) + static void write(WriteBuffer & buffer, Time source, const DateLUTImpl & timezone) { const auto day = ToDayOfWeekImpl::execute(source, 0, timezone); static constexpr std::string_view day_names[] = @@ -297,7 +297,7 @@ private: template struct HourWriter { - static inline void write(WriteBuffer & buffer, Time source, const DateLUTImpl & timezone) + static void write(WriteBuffer & buffer, Time source, const DateLUTImpl & timezone) { writeText(ToHourImpl::execute(source, timezone), buffer); } @@ -306,7 +306,7 @@ private: template struct MinuteWriter { - static inline void write(WriteBuffer & buffer, Time source, const DateLUTImpl & timezone) + static void write(WriteBuffer & buffer, Time source, const DateLUTImpl & timezone) { writeText(ToMinuteImpl::execute(source, timezone), buffer); } @@ -315,7 +315,7 @@ private: template struct SecondWriter { - static inline void write(WriteBuffer & buffer, Time source, const DateLUTImpl & timezone) + static void write(WriteBuffer & buffer, Time source, const DateLUTImpl & timezone) { writeText(ToSecondImpl::execute(source, timezone), buffer); } diff --git a/src/Functions/divide.cpp b/src/Functions/divide.cpp index ca552256cd1..7c67245c382 100644 --- a/src/Functions/divide.cpp +++ b/src/Functions/divide.cpp @@ -16,7 +16,7 @@ struct DivideFloatingImpl static const constexpr bool allow_string_integer = false; template - static inline NO_SANITIZE_UNDEFINED Result apply(A a [[maybe_unused]], B b [[maybe_unused]]) + static NO_SANITIZE_UNDEFINED Result apply(A a [[maybe_unused]], B b [[maybe_unused]]) { return static_cast(a) / b; } @@ -24,7 +24,7 @@ struct DivideFloatingImpl #if USE_EMBEDDED_COMPILER static constexpr bool compilable = true; - static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool) + static llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool) { if (left->getType()->isIntegerTy()) throw Exception(ErrorCodes::LOGICAL_ERROR, "DivideFloatingImpl expected a floating-point type"); diff --git a/src/Functions/divideDecimal.cpp b/src/Functions/divideDecimal.cpp index 1d0db232062..c8d2c5edc8a 100644 --- a/src/Functions/divideDecimal.cpp +++ b/src/Functions/divideDecimal.cpp @@ -18,7 +18,7 @@ struct DivideDecimalsImpl static constexpr auto name = "divideDecimal"; template - static inline Decimal256 + static Decimal256 execute(FirstType a, SecondType b, UInt16 scale_a, UInt16 scale_b, UInt16 result_scale) { if (b.value == 0) diff --git a/src/Functions/dynamicElement.cpp b/src/Functions/dynamicElement.cpp new file mode 100644 index 00000000000..202533dc5c8 --- /dev/null +++ b/src/Functions/dynamicElement.cpp @@ -0,0 +1,168 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int BAD_ARGUMENTS; +} + +namespace +{ + +/** Extract element of Dynamic by type name. + * Also the function looks through Arrays: you can get Array of Dynamic elements from Array of Dynamic. + */ +class FunctionDynamicElement : public IFunction +{ +public: + static constexpr auto name = "dynamicElement"; + + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + String getName() const override { return name; } + size_t getNumberOfArguments() const override { return 2; } + bool useDefaultImplementationForConstants() const override { return true; } + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } + bool useDefaultImplementationForNulls() const override { return false; } + bool useDefaultImplementationForLowCardinalityColumns() const override { return false; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + const size_t number_of_arguments = arguments.size(); + + if (number_of_arguments != 2) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Number of arguments for function {} doesn't match: passed {}, should be 2", + getName(), number_of_arguments); + + size_t count_arrays = 0; + const IDataType * input_type = arguments[0].type.get(); + while (const DataTypeArray * array = checkAndGetDataType(input_type)) + { + input_type = array->getNestedType().get(); + ++count_arrays; + } + + if (!isDynamic(*input_type)) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "First argument for function {} must be Variant or Array of Variant. Actual {}", + getName(), + arguments[0].type->getName()); + + auto return_type = makeNullableOrLowCardinalityNullableSafe(getRequestedType(arguments[1].column)); + + for (; count_arrays; --count_arrays) + return_type = std::make_shared(return_type); + + return return_type; + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + const auto & input_arg = arguments[0]; + const IDataType * input_type = input_arg.type.get(); + const IColumn * input_col = input_arg.column.get(); + + bool input_arg_is_const = false; + if (typeid_cast(input_col)) + { + input_col = assert_cast(input_col)->getDataColumnPtr().get(); + input_arg_is_const = true; + } + + Columns array_offsets; + while (const DataTypeArray * array_type = checkAndGetDataType(input_type)) + { + const ColumnArray * array_col = assert_cast(input_col); + + input_type = array_type->getNestedType().get(); + input_col = &array_col->getData(); + array_offsets.push_back(array_col->getOffsetsPtr()); + } + + const ColumnDynamic * input_col_as_dynamic = checkAndGetColumn(input_col); + const DataTypeDynamic * input_type_as_dynamic = checkAndGetDataType(input_type); + if (!input_col_as_dynamic || !input_type_as_dynamic) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "First argument for function {} must be Dynamic or array of Dynamics. Actual {}", getName(), input_arg.type->getName()); + + auto type = getRequestedType(arguments[1].column); + auto subcolumn = input_type_as_dynamic->getSubcolumn(type->getName(), input_col_as_dynamic->getPtr()); + return wrapInArraysAndConstIfNeeded(std::move(subcolumn), array_offsets, input_arg_is_const, input_rows_count); + } + +private: + DataTypePtr getRequestedType(const ColumnPtr & type_name_column) const + { + const auto * name_col = checkAndGetColumnConst(type_name_column.get()); + if (!name_col) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Second argument of {} must be a constant String", getName()); + + String element_type_name = name_col->getValue(); + auto element_type = DataTypeFactory::instance().tryGet(element_type_name); + if (!element_type) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second argument of {} must be a valid type name. Got: {}", getName(), element_type_name); + + return element_type; + } + + ColumnPtr wrapInArraysAndConstIfNeeded(ColumnPtr res, const Columns & array_offsets, bool input_arg_is_const, size_t input_rows_count) const + { + for (auto it = array_offsets.rbegin(); it != array_offsets.rend(); ++it) + res = ColumnArray::create(res, *it); + + if (input_arg_is_const) + res = ColumnConst::create(res, input_rows_count); + + return res; + } +}; + +} + +REGISTER_FUNCTION(DynamicElement) +{ + factory.registerFunction(FunctionDocumentation{ + .description = R"( +Extracts a column with specified type from a `Dynamic` column. +)", + .syntax{"dynamicElement(dynamic, type_name)"}, + .arguments{ + {"dynamic", "Dynamic column"}, + {"type_name", "The name of the variant type to extract"}}, + .examples{{{ + "Example", + R"( +CREATE TABLE test (d Dynamic) ENGINE = Memory; +INSERT INTO test VALUES (NULL), (42), ('Hello, World!'), ([1, 2, 3]); +SELECT d, dynamicType(d), dynamicElement(d, 'String'), dynamicElement(d, 'Int64'), dynamicElement(d, 'Array(Int64)'), dynamicElement(d, 'Date'), dynamicElement(d, 'Array(String)') FROM test;)", + R"( +┌─d─────────────┬─dynamicType(d)─┬─dynamicElement(d, 'String')─┬─dynamicElement(d, 'Int64')─┬─dynamicElement(d, 'Array(Int64)')─┬─dynamicElement(d, 'Date')─┬─dynamicElement(d, 'Array(String)')─┐ +│ ᴺᵁᴸᴸ │ None │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [] │ ᴺᵁᴸᴸ │ [] │ +│ 42 │ Int64 │ ᴺᵁᴸᴸ │ 42 │ [] │ ᴺᵁᴸᴸ │ [] │ +│ Hello, World! │ String │ Hello, World! │ ᴺᵁᴸᴸ │ [] │ ᴺᵁᴸᴸ │ [] │ +│ [1,2,3] │ Array(Int64) │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [1,2,3] │ ᴺᵁᴸᴸ │ [] │ +└───────────────┴────────────────┴─────────────────────────────┴────────────────────────────┴───────────────────────────────────┴───────────────────────────┴────────────────────────────────────┘ +)"}}}, + .categories{"Dynamic"}, + }); +} + +} diff --git a/src/Functions/dynamicType.cpp b/src/Functions/dynamicType.cpp new file mode 100644 index 00000000000..e8ca73597d6 --- /dev/null +++ b/src/Functions/dynamicType.cpp @@ -0,0 +1,112 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ +extern const int ILLEGAL_TYPE_OF_ARGUMENT; +extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + +namespace +{ + +/// Return String with type name for each row in Dynamic column. +class FunctionDynamicType : public IFunction +{ +public: + static constexpr auto name = "dynamicType"; + static constexpr auto name_for_null = "None"; + + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + String getName() const override { return name; } + size_t getNumberOfArguments() const override { return 1; } + bool useDefaultImplementationForConstants() const override { return true; } + bool useDefaultImplementationForNulls() const override { return false; } + bool useDefaultImplementationForLowCardinalityColumns() const override { return false; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + if (arguments.empty() || arguments.size() > 1) + throw Exception( + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Number of arguments for function {} doesn't match: passed {}, should be 1", + getName(), arguments.empty()); + + if (!isDynamic(arguments[0].type.get())) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "First argument for function {} must be Dynamic, got {} instead", + getName(), arguments[0].type->getName()); + + return std::make_shared(std::make_shared()); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + { + const ColumnDynamic * dynamic_column = checkAndGetColumn(arguments[0].column.get()); + if (!dynamic_column) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "First argument for function {} must be Dynamic, got {} instead", + getName(), arguments[0].type->getName()); + + const auto & variant_info = dynamic_column->getVariantInfo(); + const auto & variant_column = dynamic_column->getVariantColumn(); + auto res = result_type->createColumn(); + String element_type; + for (size_t i = 0; i != input_rows_count; ++i) + { + auto global_discr = variant_column.globalDiscriminatorAt(i); + if (global_discr == ColumnVariant::NULL_DISCRIMINATOR) + element_type = name_for_null; + else + element_type = variant_info.variant_names[global_discr]; + + res->insertData(element_type.data(), element_type.size()); + } + + return res; + } +}; + +} + +REGISTER_FUNCTION(DynamicType) +{ + factory.registerFunction(FunctionDocumentation{ + .description = R"( +Returns the variant type name for each row of `Dynamic` column. If row contains NULL, it returns 'None' for it. +)", + .syntax = {"dynamicType(variant)"}, + .arguments = {{"dynamic", "Dynamic column"}}, + .examples = {{{ + "Example", + R"( +CREATE TABLE test (d Dynamic) ENGINE = Memory; +INSERT INTO test VALUES (NULL), (42), ('Hello, World!'), ([1, 2, 3]); +SELECT d, dynamicType(d) FROM test; +)", + R"( +┌─d─────────────┬─dynamicType(d)─┐ +│ ᴺᵁᴸᴸ │ None │ +│ 42 │ Int64 │ +│ Hello, World! │ String │ +│ [1,2,3] │ Array(Int64) │ +└───────────────┴────────────────┘ +)"}}}, + .categories{"Variant"}, + }); +} + +} diff --git a/src/Functions/factorial.cpp b/src/Functions/factorial.cpp index b814e8198e6..7ff9126c004 100644 --- a/src/Functions/factorial.cpp +++ b/src/Functions/factorial.cpp @@ -19,7 +19,7 @@ struct FactorialImpl static const constexpr bool allow_decimal = false; static const constexpr bool allow_string_or_fixed_string = false; - static inline NO_SANITIZE_UNDEFINED ResultType apply(A a) + static NO_SANITIZE_UNDEFINED ResultType apply(A a) { if constexpr (std::is_floating_point_v || is_over_big_int) throw Exception( diff --git a/src/Functions/generateSnowflakeID.cpp b/src/Functions/generateSnowflakeID.cpp new file mode 100644 index 00000000000..c3f7701a05a --- /dev/null +++ b/src/Functions/generateSnowflakeID.cpp @@ -0,0 +1,255 @@ +#include +#include +#include +#include +#include +#include +#include +#include "base/types.h" + + +namespace DB +{ + +namespace +{ + +/* Snowflake ID + https://en.wikipedia.org/wiki/Snowflake_ID + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤ +|0| timestamp | +├─┼ ┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤ +| | machine_id | machine_seq_num | +└─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┘ + +- The first 41 (+ 1 top zero bit) bits is the timestamp (millisecond since Unix epoch 1 Jan 1970) +- The middle 10 bits are the machine ID +- The last 12 bits are a counter to disambiguate multiple snowflakeIDs generated within the same millisecond by different processes +*/ + +/// bit counts +constexpr auto timestamp_bits_count = 41; +constexpr auto machine_id_bits_count = 10; +constexpr auto machine_seq_num_bits_count = 12; + +/// bits masks for Snowflake ID components +constexpr uint64_t machine_id_mask = ((1ull << machine_id_bits_count) - 1) << machine_seq_num_bits_count; +constexpr uint64_t machine_seq_num_mask = (1ull << machine_seq_num_bits_count) - 1; + +/// max values +constexpr uint64_t max_machine_seq_num = machine_seq_num_mask; + +uint64_t getTimestamp() +{ + auto now = std::chrono::system_clock::now(); + auto ticks_since_epoch = std::chrono::duration_cast(now.time_since_epoch()).count(); + return static_cast(ticks_since_epoch) & ((1ull << timestamp_bits_count) - 1); +} + +uint64_t getMachineIdImpl() +{ + UUID server_uuid = ServerUUID::get(); + /// hash into 64 bits + uint64_t hi = UUIDHelpers::getHighBytes(server_uuid); + uint64_t lo = UUIDHelpers::getLowBytes(server_uuid); + /// return only 10 bits + return (((hi * 11) ^ (lo * 17)) & machine_id_mask) >> machine_seq_num_bits_count; +} + +uint64_t getMachineId() +{ + static uint64_t machine_id = getMachineIdImpl(); + return machine_id; +} + +struct SnowflakeId +{ + uint64_t timestamp; + uint64_t machine_id; + uint64_t machine_seq_num; +}; + +SnowflakeId toSnowflakeId(uint64_t snowflake) +{ + return {.timestamp = (snowflake >> (machine_id_bits_count + machine_seq_num_bits_count)), + .machine_id = ((snowflake & machine_id_mask) >> machine_seq_num_bits_count), + .machine_seq_num = (snowflake & machine_seq_num_mask)}; +} + +uint64_t fromSnowflakeId(SnowflakeId components) +{ + return (components.timestamp << (machine_id_bits_count + machine_seq_num_bits_count) | + components.machine_id << (machine_seq_num_bits_count) | + components.machine_seq_num); +} + +struct SnowflakeIdRange +{ + SnowflakeId begin; /// inclusive + SnowflakeId end; /// exclusive +}; + +/// To get the range of `input_rows_count` Snowflake IDs from `max(available, now)`: +/// 1. calculate Snowflake ID by current timestamp (`now`) +/// 2. `begin = max(available, now)` +/// 3. Calculate `end = begin + input_rows_count` handling `machine_seq_num` overflow +SnowflakeIdRange getRangeOfAvailableIds(const SnowflakeId & available, size_t input_rows_count) +{ + /// 1. `now` + SnowflakeId begin = {.timestamp = getTimestamp(), .machine_id = getMachineId(), .machine_seq_num = 0}; + + /// 2. `begin` + if (begin.timestamp <= available.timestamp) + { + begin.timestamp = available.timestamp; + begin.machine_seq_num = available.machine_seq_num; + } + + /// 3. `end = begin + input_rows_count` + SnowflakeId end; + const uint64_t seq_nums_in_current_timestamp_left = (max_machine_seq_num - begin.machine_seq_num + 1); + if (input_rows_count >= seq_nums_in_current_timestamp_left) + /// if sequence numbers in current timestamp is not enough for rows --> depending on how many elements input_rows_count overflows, forward timestamp by at least 1 tick + end.timestamp = begin.timestamp + 1 + (input_rows_count - seq_nums_in_current_timestamp_left) / (max_machine_seq_num + 1); + else + end.timestamp = begin.timestamp; + + end.machine_id = begin.machine_id; + end.machine_seq_num = (begin.machine_seq_num + input_rows_count) & machine_seq_num_mask; + + return {begin, end}; +} + +struct GlobalCounterPolicy +{ + static constexpr auto name = "generateSnowflakeID"; + static constexpr auto description = R"(Generates a Snowflake ID. The generated Snowflake ID contains the current Unix timestamp in milliseconds 41 (+ 1 top zero bit) bits, followed by machine id (10 bits), a counter (12 bits) to distinguish IDs within a millisecond. For any given timestamp (unix_ts_ms), the counter starts at 0 and is incremented by 1 for each new Snowflake ID until the timestamp changes. In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to 0. Function generateSnowflakeID guarantees that the counter field within a timestamp increments monotonically across all function invocations in concurrently running threads and queries.)"; + + /// Guarantee counter monotonicity within one timestamp across all threads generating Snowflake IDs simultaneously. + struct Data + { + static inline std::atomic lowest_available_snowflake_id = 0; + + SnowflakeId reserveRange(size_t input_rows_count) + { + uint64_t available_snowflake_id = lowest_available_snowflake_id.load(); + SnowflakeIdRange range; + do + { + range = getRangeOfAvailableIds(toSnowflakeId(available_snowflake_id), input_rows_count); + } + while (!lowest_available_snowflake_id.compare_exchange_weak(available_snowflake_id, fromSnowflakeId(range.end))); + /// if CAS failed --> another thread updated `lowest_available_snowflake_id` and we re-try + /// else --> our thread reserved ID range [begin, end) and return the beginning of the range + + return range.begin; + } + }; +}; + +struct ThreadLocalCounterPolicy +{ + static constexpr auto name = "generateSnowflakeIDThreadMonotonic"; + static constexpr auto description = R"(Generates a Snowflake ID. The generated Snowflake ID contains the current Unix timestamp in milliseconds 41 (+ 1 top zero bit) bits, followed by machine id (10 bits), a counter (12 bits) to distinguish IDs within a millisecond. For any given timestamp (unix_ts_ms), the counter starts at 0 and is incremented by 1 for each new Snowflake ID until the timestamp changes. In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to 0. This function behaves like generateSnowflakeID but gives no guarantee on counter monotony across different simultaneous requests. Monotonicity within one timestamp is guaranteed only within the same thread calling this function to generate Snowflake IDs.)"; + + /// Guarantee counter monotonicity within one timestamp within the same thread. Faster than GlobalCounterPolicy if a query uses multiple threads. + struct Data + { + static inline thread_local uint64_t lowest_available_snowflake_id = 0; + + SnowflakeId reserveRange(size_t input_rows_count) + { + SnowflakeIdRange range = getRangeOfAvailableIds(toSnowflakeId(lowest_available_snowflake_id), input_rows_count); + lowest_available_snowflake_id = fromSnowflakeId(range.end); + return range.begin; + } + }; +}; + +} + +template +class FunctionGenerateSnowflakeID : public IFunction, public FillPolicy +{ +public: + static FunctionPtr create(ContextPtr /*context*/) { return std::make_shared(); } + + String getName() const override { return FillPolicy::name; } + size_t getNumberOfArguments() const override { return 0; } + bool isDeterministic() const override { return false; } + bool isDeterministicInScopeOfQuery() const override { return false; } + bool useDefaultImplementationForNulls() const override { return false; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + bool isVariadic() const override { return true; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + FunctionArgumentDescriptors mandatory_args; + FunctionArgumentDescriptors optional_args{ + {"expr", nullptr, nullptr, "Arbitrary expression"} + }; + validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + + return std::make_shared(); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & /*arguments*/, const DataTypePtr &, size_t input_rows_count) const override + { + auto col_res = ColumnVector::create(); + typename ColumnVector::Container & vec_to = col_res->getData(); + + if (input_rows_count != 0) + { + vec_to.resize(input_rows_count); + + typename FillPolicy::Data data; + SnowflakeId snowflake_id = data.reserveRange(input_rows_count); /// returns begin of available snowflake ids range + + for (UInt64 & to_row : vec_to) + { + to_row = fromSnowflakeId(snowflake_id); + if (snowflake_id.machine_seq_num == max_machine_seq_num) + { + /// handle overflow + snowflake_id.machine_seq_num = 0; + ++snowflake_id.timestamp; + } + else + { + ++snowflake_id.machine_seq_num; + } + } + } + + return col_res; + } + +}; + +template +void registerSnowflakeIDGenerator(auto & factory) +{ + static constexpr auto doc_syntax_format = "{}([expression])"; + static constexpr auto example_format = "SELECT {}()"; + static constexpr auto multiple_example_format = "SELECT {f}(1), {f}(2)"; + + FunctionDocumentation::Description description = FillPolicy::description; + FunctionDocumentation::Syntax syntax = fmt::format(doc_syntax_format, FillPolicy::name); + FunctionDocumentation::Arguments arguments = {{"expression", "The expression is used to bypass common subexpression elimination if the function is called multiple times in a query but otherwise ignored. Optional."}}; + FunctionDocumentation::ReturnedValue returned_value = "A value of type UInt64"; + FunctionDocumentation::Examples examples = {{"single", fmt::format(example_format, FillPolicy::name), ""}, {"multiple", fmt::format(multiple_example_format, fmt::arg("f", FillPolicy::name)), ""}}; + FunctionDocumentation::Categories categories = {"Snowflake ID"}; + + factory.template registerFunction>({description, syntax, arguments, returned_value, examples, categories}, FunctionFactory::CaseInsensitive); +} + +REGISTER_FUNCTION(GenerateSnowflakeID) +{ + registerSnowflakeIDGenerator(factory); + registerSnowflakeIDGenerator(factory); +} + +} diff --git a/src/Functions/generateUUIDv7.cpp b/src/Functions/generateUUIDv7.cpp index 411a3a076ac..f2a82431c0a 100644 --- a/src/Functions/generateUUIDv7.cpp +++ b/src/Functions/generateUUIDv7.cpp @@ -76,7 +76,7 @@ void setVariant(UUID & uuid) struct FillAllRandomPolicy { static constexpr auto name = "generateUUIDv7NonMonotonic"; - static constexpr auto doc_description = R"(Generates a UUID of version 7. The generated UUID contains the current Unix timestamp in milliseconds (48 bits), followed by version "7" (4 bits), and a random field (74 bit, including a 2-bit variant field "2") to distinguish UUIDs within a millisecond. This function is the fastest generateUUIDv7* function but it gives no monotonicity guarantees within a timestamp.)"; + static constexpr auto description = R"(Generates a UUID of version 7. The generated UUID contains the current Unix timestamp in milliseconds (48 bits), followed by version "7" (4 bits), and a random field (74 bit, including a 2-bit variant field "2") to distinguish UUIDs within a millisecond. This function is the fastest generateUUIDv7* function but it gives no monotonicity guarantees within a timestamp.)"; struct Data { void generate(UUID & uuid, uint64_t ts) @@ -136,7 +136,7 @@ struct CounterFields struct GlobalCounterPolicy { static constexpr auto name = "generateUUIDv7"; - static constexpr auto doc_description = R"(Generates a UUID of version 7. The generated UUID contains the current Unix timestamp in milliseconds (48 bits), followed by version "7" (4 bits), a counter (42 bit, including a variant field "2", 2 bit) to distinguish UUIDs within a millisecond, and a random field (32 bits). For any given timestamp (unix_ts_ms), the counter starts at a random value and is incremented by 1 for each new UUID until the timestamp changes. In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to a random new start value. Function generateUUIDv7 guarantees that the counter field within a timestamp increments monotonically across all function invocations in concurrently running threads and queries.)"; + static constexpr auto description = R"(Generates a UUID of version 7. The generated UUID contains the current Unix timestamp in milliseconds (48 bits), followed by version "7" (4 bits), a counter (42 bit, including a variant field "2", 2 bit) to distinguish UUIDs within a millisecond, and a random field (32 bits). For any given timestamp (unix_ts_ms), the counter starts at a random value and is incremented by 1 for each new UUID until the timestamp changes. In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to a random new start value. Function generateUUIDv7 guarantees that the counter field within a timestamp increments monotonically across all function invocations in concurrently running threads and queries.)"; /// Guarantee counter monotonicity within one timestamp across all threads generating UUIDv7 simultaneously. struct Data @@ -159,7 +159,7 @@ struct GlobalCounterPolicy struct ThreadLocalCounterPolicy { static constexpr auto name = "generateUUIDv7ThreadMonotonic"; - static constexpr auto doc_description = R"(Generates a UUID of version 7. The generated UUID contains the current Unix timestamp in milliseconds (48 bits), followed by version "7" (4 bits), a counter (42 bit, including a variant field "2", 2 bit) to distinguish UUIDs within a millisecond, and a random field (32 bits). For any given timestamp (unix_ts_ms), the counter starts at a random value and is incremented by 1 for each new UUID until the timestamp changes. In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to a random new start value. This function behaves like generateUUIDv7 but gives no guarantee on counter monotony across different simultaneous requests. Monotonicity within one timestamp is guaranteed only within the same thread calling this function to generate UUIDs.)"; + static constexpr auto description = R"(Generates a UUID of version 7. The generated UUID contains the current Unix timestamp in milliseconds (48 bits), followed by version "7" (4 bits), a counter (42 bit, including a variant field "2", 2 bit) to distinguish UUIDs within a millisecond, and a random field (32 bits). For any given timestamp (unix_ts_ms), the counter starts at a random value and is incremented by 1 for each new UUID until the timestamp changes. In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to a random new start value. This function behaves like generateUUIDv7 but gives no guarantee on counter monotony across different simultaneous requests. Monotonicity within one timestamp is guaranteed only within the same thread calling this function to generate UUIDs.)"; /// Guarantee counter monotonicity within one timestamp within the same thread. Faster than GlobalCounterPolicy if a query uses multiple threads. struct Data @@ -186,7 +186,6 @@ class FunctionGenerateUUIDv7Base : public IFunction, public FillPolicy { public: String getName() const final { return FillPolicy::name; } - size_t getNumberOfArguments() const final { return 0; } bool isDeterministic() const override { return false; } bool isDeterministicInScopeOfQuery() const final { return false; } @@ -198,7 +197,7 @@ public: { FunctionArgumentDescriptors mandatory_args; FunctionArgumentDescriptors optional_args{ - {"expr", nullptr, nullptr, "Arbitrary Expression"} + {"expr", nullptr, nullptr, "Arbitrary expression"} }; validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); @@ -264,20 +263,20 @@ private: }; template -void registerUUIDv7Generator(auto& factory) +void registerUUIDv7Generator(auto & factory) { static constexpr auto doc_syntax_format = "{}([expression])"; static constexpr auto example_format = "SELECT {}()"; static constexpr auto multiple_example_format = "SELECT {f}(1), {f}(2)"; - FunctionDocumentation::Description doc_description = FillPolicy::doc_description; - FunctionDocumentation::Syntax doc_syntax = fmt::format(doc_syntax_format, FillPolicy::name); - FunctionDocumentation::Arguments doc_arguments = {{"expression", "The expression is used to bypass common subexpression elimination if the function is called multiple times in a query but otherwise ignored. Optional."}}; - FunctionDocumentation::ReturnedValue doc_returned_value = "A value of type UUID version 7."; - FunctionDocumentation::Examples doc_examples = {{"uuid", fmt::format(example_format, FillPolicy::name), ""}, {"multiple", fmt::format(multiple_example_format, fmt::arg("f", FillPolicy::name)), ""}}; - FunctionDocumentation::Categories doc_categories = {"UUID"}; + FunctionDocumentation::Description description = FillPolicy::description; + FunctionDocumentation::Syntax syntax = fmt::format(doc_syntax_format, FillPolicy::name); + FunctionDocumentation::Arguments arguments = {{"expression", "The expression is used to bypass common subexpression elimination if the function is called multiple times in a query but otherwise ignored. Optional."}}; + FunctionDocumentation::ReturnedValue returned_value = "A value of type UUID version 7."; + FunctionDocumentation::Examples examples = {{"single", fmt::format(example_format, FillPolicy::name), ""}, {"multiple", fmt::format(multiple_example_format, fmt::arg("f", FillPolicy::name)), ""}}; + FunctionDocumentation::Categories categories = {"UUID"}; - factory.template registerFunction>({doc_description, doc_syntax, doc_arguments, doc_returned_value, doc_examples, doc_categories}, FunctionFactory::CaseInsensitive); + factory.template registerFunction>({description, syntax, arguments, returned_value, examples, categories}, FunctionFactory::CaseInsensitive); } REGISTER_FUNCTION(GenerateUUIDv7) diff --git a/src/Functions/greatCircleDistance.cpp b/src/Functions/greatCircleDistance.cpp index 1c12317f510..1bd71f19f76 100644 --- a/src/Functions/greatCircleDistance.cpp +++ b/src/Functions/greatCircleDistance.cpp @@ -94,13 +94,13 @@ struct Impl } } - static inline NO_SANITIZE_UNDEFINED size_t toIndex(T x) + static NO_SANITIZE_UNDEFINED size_t toIndex(T x) { /// Implementation specific behaviour on overflow or infinite value. return static_cast(x); } - static inline T degDiff(T f) + static T degDiff(T f) { f = std::abs(f); if (f > 180) @@ -108,7 +108,7 @@ struct Impl return f; } - inline T fastCos(T x) + T fastCos(T x) { T y = std::abs(x) * (T(COS_LUT_SIZE) / T(PI) / T(2.0)); size_t i = toIndex(y); @@ -117,7 +117,7 @@ struct Impl return cos_lut[i] + (cos_lut[i + 1] - cos_lut[i]) * y; } - inline T fastSin(T x) + T fastSin(T x) { T y = std::abs(x) * (T(COS_LUT_SIZE) / T(PI) / T(2.0)); size_t i = toIndex(y); @@ -128,7 +128,7 @@ struct Impl /// fast implementation of asin(sqrt(x)) /// max error in floats 0.00369%, in doubles 0.00072% - inline T fastAsinSqrt(T x) + T fastAsinSqrt(T x) { if (x < T(0.122)) { diff --git a/src/Functions/greatest.cpp b/src/Functions/greatest.cpp index 93fd7e24853..87a48c887b4 100644 --- a/src/Functions/greatest.cpp +++ b/src/Functions/greatest.cpp @@ -15,7 +15,7 @@ struct GreatestBaseImpl static const constexpr bool allow_string_integer = false; template - static inline Result apply(A a, B b) + static Result apply(A a, B b) { return static_cast(a) > static_cast(b) ? static_cast(a) : static_cast(b); @@ -24,7 +24,7 @@ struct GreatestBaseImpl #if USE_EMBEDDED_COMPILER static constexpr bool compilable = true; - static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool is_signed) + static llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool is_signed) { if (!left->getType()->isIntegerTy()) { @@ -46,7 +46,7 @@ struct GreatestSpecialImpl static const constexpr bool allow_string_integer = false; template - static inline Result apply(A a, B b) + static Result apply(A a, B b) { static_assert(std::is_same_v, "ResultType != Result"); return accurate::greaterOp(a, b) ? static_cast(a) : static_cast(b); diff --git a/src/Functions/h3GetUnidirectionalEdge.cpp b/src/Functions/h3GetUnidirectionalEdge.cpp index 4e41cdbfef6..9e253e87104 100644 --- a/src/Functions/h3GetUnidirectionalEdge.cpp +++ b/src/Functions/h3GetUnidirectionalEdge.cpp @@ -108,7 +108,7 @@ public: /// suppress asan errors generated by the following: /// 'NEW_ADJUSTMENT_III' defined in '../contrib/h3/src/h3lib/lib/algos.c:142:24 /// 'NEW_DIGIT_III' defined in '../contrib/h3/src/h3lib/lib/algos.c:121:24 - __attribute__((no_sanitize_address)) static inline UInt64 getUnidirectionalEdge(const UInt64 origin, const UInt64 dest) + __attribute__((no_sanitize_address)) static UInt64 getUnidirectionalEdge(const UInt64 origin, const UInt64 dest) { const UInt64 res = cellsToDirectedEdge(origin, dest); return res; diff --git a/src/Functions/if.cpp b/src/Functions/if.cpp index abd3f036408..7a6d37d810d 100644 --- a/src/Functions/if.cpp +++ b/src/Functions/if.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -1157,6 +1158,11 @@ private: variant_column->applyNullMap(assert_cast(*arg_cond.column).getData()); return result_column; } + else if (auto * dynamic_column = typeid_cast(result_column.get())) + { + dynamic_column->applyNullMap(assert_cast(*arg_cond.column).getData()); + return result_column; + } else return ColumnNullable::create(materializeColumnIfConst(result_column), arg_cond.column); } @@ -1200,6 +1206,11 @@ private: variant_column->applyNegatedNullMap(assert_cast(*arg_cond.column).getData()); return result_column; } + else if (auto * dynamic_column = typeid_cast(result_column.get())) + { + dynamic_column->applyNegatedNullMap(assert_cast(*arg_cond.column).getData()); + return result_column; + } else { size_t size = input_rows_count; diff --git a/src/Functions/initialQueryID.cpp b/src/Functions/initialQueryID.cpp index 469f37cf614..9c9390d4e50 100644 --- a/src/Functions/initialQueryID.cpp +++ b/src/Functions/initialQueryID.cpp @@ -19,16 +19,16 @@ public: explicit FunctionInitialQueryID(const String & initial_query_id_) : initial_query_id(initial_query_id_) {} - inline String getName() const override { return name; } + String getName() const override { return name; } - inline size_t getNumberOfArguments() const override { return 0; } + size_t getNumberOfArguments() const override { return 0; } DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override { return std::make_shared(); } - inline bool isDeterministic() const override { return false; } + bool isDeterministic() const override { return false; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } diff --git a/src/Functions/intDiv.cpp b/src/Functions/intDiv.cpp index 38939556fa5..6b5bb00eacd 100644 --- a/src/Functions/intDiv.cpp +++ b/src/Functions/intDiv.cpp @@ -80,7 +80,7 @@ struct DivideIntegralByConstantImpl private: template - static inline void apply(const A * __restrict a, const B * __restrict b, ResultType * __restrict c, size_t i) + static void apply(const A * __restrict a, const B * __restrict b, ResultType * __restrict c, size_t i) { if constexpr (op_case == OpCase::Vector) c[i] = Op::template apply(a[i], b[i]); diff --git a/src/Functions/intDivOrZero.cpp b/src/Functions/intDivOrZero.cpp index 96ff6ea80fc..f32eac17127 100644 --- a/src/Functions/intDivOrZero.cpp +++ b/src/Functions/intDivOrZero.cpp @@ -13,7 +13,7 @@ struct DivideIntegralOrZeroImpl static const constexpr bool allow_string_integer = false; template - static inline Result apply(A a, B b) + static Result apply(A a, B b) { if (unlikely(divisionLeadsToFPE(a, b))) return 0; diff --git a/src/Functions/intExp10.cpp b/src/Functions/intExp10.cpp index 6944c4701bc..733f9d55702 100644 --- a/src/Functions/intExp10.cpp +++ b/src/Functions/intExp10.cpp @@ -19,7 +19,7 @@ struct IntExp10Impl using ResultType = UInt64; static constexpr const bool allow_string_or_fixed_string = false; - static inline ResultType apply([[maybe_unused]] A a) + static ResultType apply([[maybe_unused]] A a) { if constexpr (is_big_int_v || std::is_same_v) throw DB::Exception(ErrorCodes::NOT_IMPLEMENTED, "IntExp10 is not implemented for big integers"); diff --git a/src/Functions/intExp2.cpp b/src/Functions/intExp2.cpp index 4e5cc60a731..7e016a0dbd2 100644 --- a/src/Functions/intExp2.cpp +++ b/src/Functions/intExp2.cpp @@ -20,7 +20,7 @@ struct IntExp2Impl using ResultType = UInt64; static constexpr bool allow_string_or_fixed_string = false; - static inline ResultType apply([[maybe_unused]] A a) + static ResultType apply([[maybe_unused]] A a) { if constexpr (is_big_int_v) throw DB::Exception(ErrorCodes::NOT_IMPLEMENTED, "intExp2 not implemented for big integers"); @@ -31,7 +31,7 @@ struct IntExp2Impl #if USE_EMBEDDED_COMPILER static constexpr bool compilable = true; - static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * arg, bool) + static llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * arg, bool) { if (!arg->getType()->isIntegerTy()) throw Exception(ErrorCodes::LOGICAL_ERROR, "IntExp2Impl expected an integral type"); diff --git a/src/Functions/isNotNull.cpp b/src/Functions/isNotNull.cpp index dd53c700221..ea95a5c2b1c 100644 --- a/src/Functions/isNotNull.cpp +++ b/src/Functions/isNotNull.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include #include @@ -44,9 +45,10 @@ public: { const ColumnWithTypeAndName & elem = arguments[0]; - if (isVariant(elem.type)) + if (isVariant(elem.type) || isDynamic(elem.type)) { - const auto & discriminators = checkAndGetColumn(*elem.column).getLocalDiscriminators(); + const auto & column_variant = isVariant(elem.type) ? checkAndGetColumn(*elem.column) : checkAndGetColumn(*elem.column).getVariantColumn(); + const auto & discriminators = column_variant.getLocalDiscriminators(); auto res = DataTypeUInt8().createColumn(); auto & data = typeid_cast(*res).getData(); data.resize(discriminators.size()); diff --git a/src/Functions/isNull.cpp b/src/Functions/isNull.cpp index 7a6dabab7af..a98ff2ab8e8 100644 --- a/src/Functions/isNull.cpp +++ b/src/Functions/isNull.cpp @@ -6,6 +6,7 @@ #include #include #include +#include namespace DB @@ -46,9 +47,10 @@ public: { const ColumnWithTypeAndName & elem = arguments[0]; - if (isVariant(elem.type)) + if (isVariant(elem.type) || isDynamic(elem.type)) { - const auto & discriminators = checkAndGetColumn(*elem.column).getLocalDiscriminators(); + const auto & column_variant = isVariant(elem.type) ? checkAndGetColumn(*elem.column) : checkAndGetColumn(*elem.column).getVariantColumn(); + const auto & discriminators = column_variant.getLocalDiscriminators(); auto res = DataTypeUInt8().createColumn(); auto & data = typeid_cast(*res).getData(); data.reserve(discriminators.size()); diff --git a/src/Functions/isValidUTF8.cpp b/src/Functions/isValidUTF8.cpp index e7aba672356..d5f5e6a8986 100644 --- a/src/Functions/isValidUTF8.cpp +++ b/src/Functions/isValidUTF8.cpp @@ -65,9 +65,9 @@ SOFTWARE. */ #ifndef __SSE4_1__ - static inline UInt8 isValidUTF8(const UInt8 * data, UInt64 len) { return DB::UTF8::isValidUTF8(data, len); } + static UInt8 isValidUTF8(const UInt8 * data, UInt64 len) { return DB::UTF8::isValidUTF8(data, len); } #else - static inline UInt8 isValidUTF8(const UInt8 * data, UInt64 len) + static UInt8 isValidUTF8(const UInt8 * data, UInt64 len) { /* * Map high nibble of "First Byte" to legal character length minus 1 diff --git a/src/Functions/jumpConsistentHash.cpp b/src/Functions/jumpConsistentHash.cpp index ffc21eb5cea..fbac5d4fdd5 100644 --- a/src/Functions/jumpConsistentHash.cpp +++ b/src/Functions/jumpConsistentHash.cpp @@ -29,7 +29,7 @@ struct JumpConsistentHashImpl using BucketsType = ResultType; static constexpr auto max_buckets = static_cast(std::numeric_limits::max()); - static inline ResultType apply(UInt64 hash, BucketsType n) + static ResultType apply(UInt64 hash, BucketsType n) { return JumpConsistentHash(hash, n); } diff --git a/src/Functions/kostikConsistentHash.cpp b/src/Functions/kostikConsistentHash.cpp index 47a9a928976..42004ed40d9 100644 --- a/src/Functions/kostikConsistentHash.cpp +++ b/src/Functions/kostikConsistentHash.cpp @@ -17,7 +17,7 @@ struct KostikConsistentHashImpl using BucketsType = ResultType; static constexpr auto max_buckets = 32768; - static inline ResultType apply(UInt64 hash, BucketsType n) + static ResultType apply(UInt64 hash, BucketsType n) { return ConsistentHashing(hash, n); } diff --git a/src/Functions/least.cpp b/src/Functions/least.cpp index f5680d4d468..babb8378d80 100644 --- a/src/Functions/least.cpp +++ b/src/Functions/least.cpp @@ -15,7 +15,7 @@ struct LeastBaseImpl static const constexpr bool allow_string_integer = false; template - static inline Result apply(A a, B b) + static Result apply(A a, B b) { /** gcc 4.9.2 successfully vectorizes a loop from this function. */ return static_cast(a) < static_cast(b) ? static_cast(a) : static_cast(b); @@ -24,7 +24,7 @@ struct LeastBaseImpl #if USE_EMBEDDED_COMPILER static constexpr bool compilable = true; - static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool is_signed) + static llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool is_signed) { if (!left->getType()->isIntegerTy()) { @@ -46,7 +46,7 @@ struct LeastSpecialImpl static const constexpr bool allow_string_integer = false; template - static inline Result apply(A a, B b) + static Result apply(A a, B b) { static_assert(std::is_same_v, "ResultType != Result"); return accurate::lessOp(a, b) ? static_cast(a) : static_cast(b); diff --git a/src/Functions/minus.cpp b/src/Functions/minus.cpp index 04877a42b18..f3b9b8a7bcb 100644 --- a/src/Functions/minus.cpp +++ b/src/Functions/minus.cpp @@ -13,7 +13,7 @@ struct MinusImpl static const constexpr bool allow_string_integer = false; template - static inline NO_SANITIZE_UNDEFINED Result apply(A a, B b) + static NO_SANITIZE_UNDEFINED Result apply(A a, B b) { if constexpr (is_big_int_v || is_big_int_v) { @@ -28,7 +28,7 @@ struct MinusImpl /// Apply operation and check overflow. It's used for Deciamal operations. @returns true if overflowed, false otherwise. template - static inline bool apply(A a, B b, Result & c) + static bool apply(A a, B b, Result & c) { return common::subOverflow(static_cast(a), b, c); } @@ -36,7 +36,7 @@ struct MinusImpl #if USE_EMBEDDED_COMPILER static constexpr bool compilable = true; - static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool) + static llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool) { return left->getType()->isIntegerTy() ? b.CreateSub(left, right) : b.CreateFSub(left, right); } diff --git a/src/Functions/modulo.cpp b/src/Functions/modulo.cpp index cbc2ec2cd0a..ebc1c4f5275 100644 --- a/src/Functions/modulo.cpp +++ b/src/Functions/modulo.cpp @@ -105,7 +105,7 @@ struct ModuloByConstantImpl private: template - static inline void apply(const A * __restrict a, const B * __restrict b, ResultType * __restrict c, size_t i) + static void apply(const A * __restrict a, const B * __restrict b, ResultType * __restrict c, size_t i) { if constexpr (op_case == OpCase::Vector) c[i] = Op::template apply(a[i], b[i]); diff --git a/src/Functions/moduloOrZero.cpp b/src/Functions/moduloOrZero.cpp index 3551ae74c5f..cd7873b3b9e 100644 --- a/src/Functions/moduloOrZero.cpp +++ b/src/Functions/moduloOrZero.cpp @@ -15,7 +15,7 @@ struct ModuloOrZeroImpl static const constexpr bool allow_string_integer = false; template - static inline Result apply(A a, B b) + static Result apply(A a, B b) { if constexpr (std::is_floating_point_v) { diff --git a/src/Functions/multiply.cpp b/src/Functions/multiply.cpp index 4dc8cd10f31..67b6fff6b58 100644 --- a/src/Functions/multiply.cpp +++ b/src/Functions/multiply.cpp @@ -14,7 +14,7 @@ struct MultiplyImpl static const constexpr bool allow_string_integer = false; template - static inline NO_SANITIZE_UNDEFINED Result apply(A a, B b) + static NO_SANITIZE_UNDEFINED Result apply(A a, B b) { if constexpr (is_big_int_v || is_big_int_v) { @@ -29,7 +29,7 @@ struct MultiplyImpl /// Apply operation and check overflow. It's used for Decimal operations. @returns true if overflowed, false otherwise. template - static inline bool apply(A a, B b, Result & c) + static bool apply(A a, B b, Result & c) { if constexpr (std::is_same_v || std::is_same_v) { @@ -43,7 +43,7 @@ struct MultiplyImpl #if USE_EMBEDDED_COMPILER static constexpr bool compilable = true; - static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool) + static llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool) { return left->getType()->isIntegerTy() ? b.CreateMul(left, right) : b.CreateFMul(left, right); } diff --git a/src/Functions/multiplyDecimal.cpp b/src/Functions/multiplyDecimal.cpp index ed6487c6683..7e30a893d72 100644 --- a/src/Functions/multiplyDecimal.cpp +++ b/src/Functions/multiplyDecimal.cpp @@ -17,7 +17,7 @@ struct MultiplyDecimalsImpl static constexpr auto name = "multiplyDecimal"; template - static inline Decimal256 + static Decimal256 execute(FirstType a, SecondType b, UInt16 scale_a, UInt16 scale_b, UInt16 result_scale) { if (a.value == 0 || b.value == 0) diff --git a/src/Functions/negate.cpp b/src/Functions/negate.cpp index bd47780dea8..2c9b461274d 100644 --- a/src/Functions/negate.cpp +++ b/src/Functions/negate.cpp @@ -11,7 +11,7 @@ struct NegateImpl using ResultType = std::conditional_t, A, typename NumberTraits::ResultOfNegate::Type>; static constexpr const bool allow_string_or_fixed_string = false; - static inline NO_SANITIZE_UNDEFINED ResultType apply(A a) + static NO_SANITIZE_UNDEFINED ResultType apply(A a) { return -static_cast(a); } @@ -19,7 +19,7 @@ struct NegateImpl #if USE_EMBEDDED_COMPILER static constexpr bool compilable = true; - static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * arg, bool) + static llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * arg, bool) { return arg->getType()->isIntegerTy() ? b.CreateNeg(arg) : b.CreateFNeg(arg); } diff --git a/src/Functions/plus.cpp b/src/Functions/plus.cpp index cd9cf6cec5c..ffb0fe2ade7 100644 --- a/src/Functions/plus.cpp +++ b/src/Functions/plus.cpp @@ -14,7 +14,7 @@ struct PlusImpl static const constexpr bool is_commutative = true; template - static inline NO_SANITIZE_UNDEFINED Result apply(A a, B b) + static NO_SANITIZE_UNDEFINED Result apply(A a, B b) { /// Next everywhere, static_cast - so that there is no wrong result in expressions of the form Int64 c = UInt32(a) * Int32(-1). if constexpr (is_big_int_v || is_big_int_v) @@ -30,7 +30,7 @@ struct PlusImpl /// Apply operation and check overflow. It's used for Deciamal operations. @returns true if overflowed, false otherwise. template - static inline bool apply(A a, B b, Result & c) + static bool apply(A a, B b, Result & c) { return common::addOverflow(static_cast(a), b, c); } @@ -38,7 +38,7 @@ struct PlusImpl #if USE_EMBEDDED_COMPILER static constexpr bool compilable = true; - static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool) + static llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool) { return left->getType()->isIntegerTy() ? b.CreateAdd(left, right) : b.CreateFAdd(left, right); } diff --git a/src/Functions/queryID.cpp b/src/Functions/queryID.cpp index 704206e1de5..5d0ac719797 100644 --- a/src/Functions/queryID.cpp +++ b/src/Functions/queryID.cpp @@ -19,16 +19,16 @@ public: explicit FunctionQueryID(const String & query_id_) : query_id(query_id_) {} - inline String getName() const override { return name; } + String getName() const override { return name; } - inline size_t getNumberOfArguments() const override { return 0; } + size_t getNumberOfArguments() const override { return 0; } DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override { return std::make_shared(); } - inline bool isDeterministic() const override { return false; } + bool isDeterministic() const override { return false; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } diff --git a/src/Functions/repeat.cpp b/src/Functions/repeat.cpp index 84597f4eadc..7f2fe646062 100644 --- a/src/Functions/repeat.cpp +++ b/src/Functions/repeat.cpp @@ -22,14 +22,14 @@ namespace struct RepeatImpl { /// Safety threshold against DoS. - static inline void checkRepeatTime(UInt64 repeat_time) + static void checkRepeatTime(UInt64 repeat_time) { static constexpr UInt64 max_repeat_times = 1'000'000; if (repeat_time > max_repeat_times) throw Exception(ErrorCodes::TOO_LARGE_STRING_SIZE, "Too many times to repeat ({}), maximum is: {}", repeat_time, max_repeat_times); } - static inline void checkStringSize(UInt64 size) + static void checkStringSize(UInt64 size) { static constexpr UInt64 max_string_size = 1 << 30; if (size > max_string_size) diff --git a/src/Functions/roundAge.cpp b/src/Functions/roundAge.cpp index cca92c19b0c..38eda9f3383 100644 --- a/src/Functions/roundAge.cpp +++ b/src/Functions/roundAge.cpp @@ -12,7 +12,7 @@ struct RoundAgeImpl using ResultType = UInt8; static constexpr const bool allow_string_or_fixed_string = false; - static inline ResultType apply(A x) + static ResultType apply(A x) { return x < 1 ? 0 : (x < 18 ? 17 diff --git a/src/Functions/roundDuration.cpp b/src/Functions/roundDuration.cpp index 918f0b3425d..963080ba0d2 100644 --- a/src/Functions/roundDuration.cpp +++ b/src/Functions/roundDuration.cpp @@ -12,7 +12,7 @@ struct RoundDurationImpl using ResultType = UInt16; static constexpr bool allow_string_or_fixed_string = false; - static inline ResultType apply(A x) + static ResultType apply(A x) { return x < 1 ? 0 : (x < 10 ? 1 diff --git a/src/Functions/roundToExp2.cpp b/src/Functions/roundToExp2.cpp index 607c67b742e..eb0df8884c5 100644 --- a/src/Functions/roundToExp2.cpp +++ b/src/Functions/roundToExp2.cpp @@ -65,7 +65,7 @@ struct RoundToExp2Impl using ResultType = T; static constexpr const bool allow_string_or_fixed_string = false; - static inline T apply(T x) + static T apply(T x) { return roundDownToPowerOfTwo(x); } diff --git a/src/Functions/sign.cpp b/src/Functions/sign.cpp index 6c849760eed..3dd2ac8e3aa 100644 --- a/src/Functions/sign.cpp +++ b/src/Functions/sign.cpp @@ -11,7 +11,7 @@ struct SignImpl using ResultType = Int8; static constexpr bool allow_string_or_fixed_string = false; - static inline NO_SANITIZE_UNDEFINED ResultType apply(A a) + static NO_SANITIZE_UNDEFINED ResultType apply(A a) { if constexpr (is_decimal || std::is_floating_point_v) return a < A(0) ? -1 : a == A(0) ? 0 : 1; diff --git a/src/Functions/space.cpp b/src/Functions/space.cpp index 4cfa629aa33..83183c991bc 100644 --- a/src/Functions/space.cpp +++ b/src/Functions/space.cpp @@ -27,7 +27,7 @@ private: static constexpr auto space = ' '; /// Safety threshold against DoS. - static inline void checkRepeatTime(size_t repeat_time) + static void checkRepeatTime(size_t repeat_time) { static constexpr auto max_repeat_times = 1'000'000uz; if (repeat_time > max_repeat_times) diff --git a/src/Functions/splitByRegexp.cpp b/src/Functions/splitByRegexp.cpp index 32afb813a04..042db97794d 100644 --- a/src/Functions/splitByRegexp.cpp +++ b/src/Functions/splitByRegexp.cpp @@ -1,9 +1,11 @@ #include +#include +#include #include #include -#include #include #include +#include #include @@ -102,7 +104,7 @@ public: return false; } - pos += 1; + ++pos; token_end = pos; ++splits; } @@ -148,11 +150,67 @@ public: using FunctionSplitByRegexp = FunctionTokens; +/// Fallback splitByRegexp to splitByChar when its 1st argument is a trivial char for better performance +class SplitByRegexpOverloadResolver : public IFunctionOverloadResolver +{ +public: + static constexpr auto name = "splitByRegexp"; + static FunctionOverloadResolverPtr create(ContextPtr context) { return std::make_unique(context); } + + explicit SplitByRegexpOverloadResolver(ContextPtr context_) + : context(context_) + , split_by_regexp(FunctionSplitByRegexp::create(context)) {} + + String getName() const override { return name; } + size_t getNumberOfArguments() const override { return SplitByRegexpImpl::getNumberOfArguments(); } + bool isVariadic() const override { return SplitByRegexpImpl::isVariadic(); } + /// ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return SplitByRegexpImpl::getArgumentsThatAreAlwaysConstant(); } + + FunctionBasePtr buildImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & return_type) const override + { + if (patternIsTrivialChar(arguments)) + return FunctionFactory::instance().getImpl("splitByChar", context)->build(arguments); + else + return std::make_unique( + split_by_regexp, collections::map(arguments, [](const auto & elem) { return elem.type; }), return_type); + } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + return split_by_regexp->getReturnTypeImpl(arguments); + } + +private: + bool patternIsTrivialChar(const ColumnsWithTypeAndName & arguments) const + { + if (!arguments[0].column.get()) + return false; + const ColumnConst * col = checkAndGetColumnConstStringOrFixedString(arguments[0].column.get()); + if (!col) + return false; + + String pattern = col->getValue(); + if (pattern.size() == 1) + { + OptimizedRegularExpression re = Regexps::createRegexp(pattern); + + std::string required_substring; + bool is_trivial; + bool required_substring_is_prefix; + re.getAnalyzeResult(required_substring, is_trivial, required_substring_is_prefix); + return is_trivial && required_substring == pattern; + } + return false; + } + + ContextPtr context; + FunctionPtr split_by_regexp; +}; } REGISTER_FUNCTION(SplitByRegexp) { - factory.registerFunction(); + factory.registerFunction(); } } diff --git a/src/Functions/tokenExtractors.cpp b/src/Functions/tokenExtractors.cpp index a29d759d2ca..e7dcb5cced3 100644 --- a/src/Functions/tokenExtractors.cpp +++ b/src/Functions/tokenExtractors.cpp @@ -116,7 +116,7 @@ public: private: template - inline void executeImpl( + void executeImpl( const ExtractorType & extractor, StringColumnType & input_data_column, ResultStringColumnType & result_data_column, diff --git a/src/Functions/variantElement.cpp b/src/Functions/variantElement.cpp index 2744a0dabb8..80d34083d9d 100644 --- a/src/Functions/variantElement.cpp +++ b/src/Functions/variantElement.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -111,61 +112,15 @@ public: throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument for function {} must be Variant or array of Variants. Actual {}", getName(), input_arg.type->getName()); - std::optional variant_global_discr = getVariantGlobalDiscriminator(arguments[1].column, *input_type_as_variant, arguments.size()); + auto variant_discr = getVariantGlobalDiscriminator(arguments[1].column, *input_type_as_variant, arguments.size()); - if (!variant_global_discr.has_value()) + if (!variant_discr) return arguments[2].column; - const auto & variant_type = input_type_as_variant->getVariant(*variant_global_discr); - const auto & variant_column = input_col_as_variant->getVariantPtrByGlobalDiscriminator(*variant_global_discr); - - /// If Variant has only NULLs or our variant doesn't have any real values, - /// just create column with default values and create null mask with 1. - if (input_col_as_variant->hasOnlyNulls() || variant_column->empty()) - { - auto res = variant_type->createColumn(); - - if (variant_type->lowCardinality()) - assert_cast(*res).nestedToNullable(); - - res->insertManyDefaults(input_col_as_variant->size()); - if (!variant_type->canBeInsideNullable()) - return wrapInArraysAndConstIfNeeded(std::move(res), array_offsets, input_arg_is_const, input_rows_count); - - auto null_map = ColumnUInt8::create(); - auto & null_map_data = null_map->getData(); - null_map_data.resize_fill(input_col_as_variant->size(), 1); - return wrapInArraysAndConstIfNeeded(ColumnNullable::create(std::move(res), std::move(null_map)), array_offsets, input_arg_is_const, input_rows_count); - } - - /// If we extract single non-empty column and have no NULLs, then just return this variant. - if (auto non_empty_local_discr = input_col_as_variant->getLocalDiscriminatorOfOneNoneEmptyVariantNoNulls()) - { - /// If we were trying to extract some other variant, - /// it would be empty and we would already processed this case above. - chassert(input_col_as_variant->globalDiscriminatorByLocal(*non_empty_local_discr) == variant_global_discr); - return wrapInArraysAndConstIfNeeded(makeNullableOrLowCardinalityNullableSafe(variant_column), array_offsets, input_arg_is_const, input_rows_count); - } - - /// In general case we should calculate null-mask for variant - /// according to the discriminators column and expand - /// variant column by this mask to get a full column (with default values on NULLs) - const auto & local_discriminators = input_col_as_variant->getLocalDiscriminators(); - auto null_map = ColumnUInt8::create(); - auto & null_map_data = null_map->getData(); - null_map_data.reserve(local_discriminators.size()); - auto variant_local_discr = input_col_as_variant->localDiscriminatorByGlobal(*variant_global_discr); - for (auto local_discr : local_discriminators) - null_map_data.push_back(local_discr != variant_local_discr); - - auto expanded_variant_column = IColumn::mutate(variant_column); - if (variant_type->lowCardinality()) - expanded_variant_column = assert_cast(*expanded_variant_column).cloneNullable(); - expanded_variant_column->expand(null_map_data, /*inverted = */ true); - if (variant_type->canBeInsideNullable()) - return wrapInArraysAndConstIfNeeded(ColumnNullable::create(std::move(expanded_variant_column), std::move(null_map)), array_offsets, input_arg_is_const, input_rows_count); - return wrapInArraysAndConstIfNeeded(std::move(expanded_variant_column), array_offsets, input_arg_is_const, input_rows_count); + auto variant_column = input_type_as_variant->getSubcolumn(input_type_as_variant->getVariant(*variant_discr)->getName(), input_col_as_variant->getPtr()); + return wrapInArraysAndConstIfNeeded(std::move(variant_column), array_offsets, input_arg_is_const, input_rows_count); } + private: std::optional getVariantGlobalDiscriminator(const ColumnPtr & index_column, const DataTypeVariant & variant_type, size_t argument_size) const { @@ -175,20 +130,16 @@ private: "Second argument to {} with Variant argument must be a constant String", getName()); - String variant_element_name = name_col->getValue(); - auto variant_element_type = DataTypeFactory::instance().tryGet(variant_element_name); - if (variant_element_type) + auto variant_element_name = name_col->getValue(); + if (auto variant_element_type = DataTypeFactory::instance().tryGet(variant_element_name)) { - const auto & variants = variant_type.getVariants(); - for (size_t i = 0; i != variants.size(); ++i) - { - if (variants[i]->getName() == variant_element_type->getName()) - return i; - } + if (auto discr = variant_type.tryGetVariantDiscriminator(variant_element_type->getName())) + return discr; } if (argument_size == 2) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "{} doesn't contain variant with type {}", variant_type.getName(), variant_element_name); + return std::nullopt; } @@ -213,10 +164,10 @@ REGISTER_FUNCTION(VariantElement) Extracts a column with specified type from a `Variant` column. )", .syntax{"variantElement(variant, type_name, [, default_value])"}, - .arguments{{ + .arguments{ {"variant", "Variant column"}, {"type_name", "The name of the variant type to extract"}, - {"default_value", "The default value that will be used if variant doesn't have variant with specified type. Can be any type. Optional"}}}, + {"default_value", "The default value that will be used if variant doesn't have variant with specified type. Can be any type. Optional"}}, .examples{{{ "Example", R"( diff --git a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h index 6ad54923ab5..9c20ee4cff0 100644 --- a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h +++ b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h @@ -4,8 +4,7 @@ #if USE_AZURE_BLOB_STORAGE -#include -#include +#include #include #include #include diff --git a/src/IO/BufferBase.h b/src/IO/BufferBase.h index e98f00270e2..62fe011c0b6 100644 --- a/src/IO/BufferBase.h +++ b/src/IO/BufferBase.h @@ -37,13 +37,13 @@ public: { Buffer(Position begin_pos_, Position end_pos_) : begin_pos(begin_pos_), end_pos(end_pos_) {} - inline Position begin() const { return begin_pos; } - inline Position end() const { return end_pos; } - inline size_t size() const { return size_t(end_pos - begin_pos); } - inline void resize(size_t size) { end_pos = begin_pos + size; } - inline bool empty() const { return size() == 0; } + Position begin() const { return begin_pos; } + Position end() const { return end_pos; } + size_t size() const { return size_t(end_pos - begin_pos); } + void resize(size_t size) { end_pos = begin_pos + size; } + bool empty() const { return size() == 0; } - inline void swap(Buffer & other) noexcept + void swap(Buffer & other) noexcept { std::swap(begin_pos, other.begin_pos); std::swap(end_pos, other.end_pos); @@ -71,21 +71,21 @@ public: } /// get buffer - inline Buffer & internalBuffer() { return internal_buffer; } + Buffer & internalBuffer() { return internal_buffer; } /// get the part of the buffer from which you can read / write data - inline Buffer & buffer() { return working_buffer; } + Buffer & buffer() { return working_buffer; } /// get (for reading and modifying) the position in the buffer - inline Position & position() { return pos; } + Position & position() { return pos; } /// offset in bytes of the cursor from the beginning of the buffer - inline size_t offset() const { return size_t(pos - working_buffer.begin()); } + size_t offset() const { return size_t(pos - working_buffer.begin()); } /// How many bytes are available for read/write - inline size_t available() const { return size_t(working_buffer.end() - pos); } + size_t available() const { return size_t(working_buffer.end() - pos); } - inline void swap(BufferBase & other) noexcept + void swap(BufferBase & other) noexcept { internal_buffer.swap(other.internal_buffer); working_buffer.swap(other.working_buffer); diff --git a/src/IO/HTTPHeaderEntries.h b/src/IO/HTTPHeaderEntries.h index 5862f1ead15..36b2ccc4ba5 100644 --- a/src/IO/HTTPHeaderEntries.h +++ b/src/IO/HTTPHeaderEntries.h @@ -10,7 +10,7 @@ struct HTTPHeaderEntry std::string value; HTTPHeaderEntry(const std::string & name_, const std::string & value_) : name(name_), value(value_) {} - inline bool operator==(const HTTPHeaderEntry & other) const { return name == other.name && value == other.value; } + bool operator==(const HTTPHeaderEntry & other) const { return name == other.name && value == other.value; } }; using HTTPHeaderEntries = std::vector; diff --git a/src/IO/HadoopSnappyReadBuffer.h b/src/IO/HadoopSnappyReadBuffer.h index 73e52f2c503..eba614d9d0a 100644 --- a/src/IO/HadoopSnappyReadBuffer.h +++ b/src/IO/HadoopSnappyReadBuffer.h @@ -37,7 +37,7 @@ public: Status readBlock(size_t * avail_in, const char ** next_in, size_t * avail_out, char ** next_out); - inline void reset() + void reset() { buffer_length = 0; block_length = -1; @@ -73,7 +73,7 @@ class HadoopSnappyReadBuffer : public CompressedReadBufferWrapper public: using Status = HadoopSnappyDecoder::Status; - inline static String statusToString(Status status) + static String statusToString(Status status) { switch (status) { diff --git a/src/IO/IReadableWriteBuffer.h b/src/IO/IReadableWriteBuffer.h index dda5fc07c8e..db379fef969 100644 --- a/src/IO/IReadableWriteBuffer.h +++ b/src/IO/IReadableWriteBuffer.h @@ -8,7 +8,7 @@ namespace DB struct IReadableWriteBuffer { /// At the first time returns getReadBufferImpl(). Next calls return nullptr. - inline std::unique_ptr tryGetReadBuffer() + std::unique_ptr tryGetReadBuffer() { if (!can_reread) return nullptr; diff --git a/src/IO/PeekableReadBuffer.h b/src/IO/PeekableReadBuffer.h index 2ee209ffd6c..e831956956f 100644 --- a/src/IO/PeekableReadBuffer.h +++ b/src/IO/PeekableReadBuffer.h @@ -83,9 +83,9 @@ private: bool peekNext(); - inline bool useSubbufferOnly() const { return !peeked_size; } - inline bool currentlyReadFromOwnMemory() const { return working_buffer.begin() != sub_buf->buffer().begin(); } - inline bool checkpointInOwnMemory() const { return checkpoint_in_own_memory; } + bool useSubbufferOnly() const { return !peeked_size; } + bool currentlyReadFromOwnMemory() const { return working_buffer.begin() != sub_buf->buffer().begin(); } + bool checkpointInOwnMemory() const { return checkpoint_in_own_memory; } void checkStateCorrect() const; diff --git a/src/IO/ReadBuffer.h b/src/IO/ReadBuffer.h index 056e25a5fbe..73f5335411f 100644 --- a/src/IO/ReadBuffer.h +++ b/src/IO/ReadBuffer.h @@ -85,7 +85,7 @@ public: } - inline void nextIfAtEnd() + void nextIfAtEnd() { if (!hasPendingData()) next(); diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp index b428b1c7d8a..c771fced73a 100644 --- a/src/IO/ReadHelpers.cpp +++ b/src/IO/ReadHelpers.cpp @@ -352,7 +352,6 @@ static ReturnType parseComplexEscapeSequence(Vector & s, ReadBuffer & buf) { return error("Cannot parse escape sequence", ErrorCodes::CANNOT_PARSE_ESCAPE_SEQUENCE); } - s.push_back(unhex2(hex_code)); } else if (char_after_backslash == 'N') @@ -608,13 +607,20 @@ static ReturnType parseJSONEscapeSequence(Vector & s, ReadBuffer & buf, bool kee } -template +template void readEscapedStringIntoImpl(Vector & s, ReadBuffer & buf) { while (!buf.eof()) { - char * next_pos = find_first_symbols<'\t', '\n', '\\'>(buf.position(), buf.buffer().end()); - + char * next_pos; + if constexpr (support_crlf) + { + next_pos = find_first_symbols<'\t', '\n', '\\','\r'>(buf.position(), buf.buffer().end()); + } + else + { + next_pos = find_first_symbols<'\t', '\n', '\\'>(buf.position(), buf.buffer().end()); + } appendToStringOrVector(s, buf, next_pos); buf.position() = next_pos; @@ -641,25 +647,46 @@ void readEscapedStringIntoImpl(Vector & s, ReadBuffer & buf) } } } + + if constexpr (support_crlf) + { + if (*buf.position() == '\r') + { + ++buf.position(); + if (!buf.eof() && *buf.position() != '\n') + { + s.push_back('\r'); + continue; + } + return; + } + } } } -template +template void readEscapedStringInto(Vector & s, ReadBuffer & buf) { - readEscapedStringIntoImpl(s, buf); + readEscapedStringIntoImpl(s, buf); } void readEscapedString(String & s, ReadBuffer & buf) { s.clear(); - readEscapedStringInto(s, buf); + readEscapedStringInto(s, buf); } -template void readEscapedStringInto>(PaddedPODArray & s, ReadBuffer & buf); -template void readEscapedStringInto(NullOutput & s, ReadBuffer & buf); +void readEscapedStringCRLF(String & s, ReadBuffer & buf) +{ + s.clear(); + readEscapedStringInto(s, buf); +} +template void readEscapedStringInto,false>(PaddedPODArray & s, ReadBuffer & buf); +template void readEscapedStringInto(NullOutput & s, ReadBuffer & buf); +template void readEscapedStringInto,true>(PaddedPODArray & s, ReadBuffer & buf); +template void readEscapedStringInto(NullOutput & s, ReadBuffer & buf); /** If enable_sql_style_quoting == true, * strings like 'abc''def' will be parsed as abc'def. @@ -2069,7 +2096,14 @@ bool tryReadJSONField(String & s, ReadBuffer & buf, const FormatSettings::JSON & void readTSVField(String & s, ReadBuffer & buf) { s.clear(); - readEscapedStringIntoImpl(s, buf); + readEscapedStringIntoImpl(s, buf); } +void readTSVFieldCRLF(String & s, ReadBuffer & buf) +{ + s.clear(); + readEscapedStringIntoImpl(s, buf); +} + + } diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h index 63bfae513e7..ffba4fafb5c 100644 --- a/src/IO/ReadHelpers.h +++ b/src/IO/ReadHelpers.h @@ -583,6 +583,8 @@ void readString(String & s, ReadBuffer & buf); void readEscapedString(String & s, ReadBuffer & buf); +void readEscapedStringCRLF(String & s, ReadBuffer & buf); + void readQuotedString(String & s, ReadBuffer & buf); void readQuotedStringWithSQLStyle(String & s, ReadBuffer & buf); @@ -645,7 +647,7 @@ void readStringInto(Vector & s, ReadBuffer & buf); template void readNullTerminated(Vector & s, ReadBuffer & buf); -template +template void readEscapedStringInto(Vector & s, ReadBuffer & buf); template @@ -1901,6 +1903,7 @@ void readJSONField(String & s, ReadBuffer & buf, const FormatSettings::JSON & se bool tryReadJSONField(String & s, ReadBuffer & buf, const FormatSettings::JSON & settings); void readTSVField(String & s, ReadBuffer & buf); +void readTSVFieldCRLF(String & s, ReadBuffer & buf); /** Parse the escape sequence, which can be simple (one character after backslash) or more complex (multiple characters). * It is assumed that the cursor is located on the `\` symbol diff --git a/src/IO/S3/Requests.h b/src/IO/S3/Requests.h index 424cf65caf2..3b03356a8fb 100644 --- a/src/IO/S3/Requests.h +++ b/src/IO/S3/Requests.h @@ -169,7 +169,7 @@ using DeleteObjectsRequest = ExtendedRequest; class ComposeObjectRequest : public ExtendedRequest { public: - inline const char * GetServiceRequestName() const override { return "ComposeObject"; } + const char * GetServiceRequestName() const override { return "ComposeObject"; } AWS_S3_API Aws::String SerializePayload() const override; diff --git a/src/IO/S3/URI.h b/src/IO/S3/URI.h index c52e6bc1441..363f98c46f5 100644 --- a/src/IO/S3/URI.h +++ b/src/IO/S3/URI.h @@ -29,6 +29,7 @@ struct URI std::string key; std::string version_id; std::string storage_name; + /// Path (or path pattern) in archive if uri is an archive. std::optional archive_pattern; std::string uri_str; diff --git a/src/IO/S3/copyS3File.cpp b/src/IO/S3/copyS3File.cpp index cff6fa5ad21..24e14985758 100644 --- a/src/IO/S3/copyS3File.cpp +++ b/src/IO/S3/copyS3File.cpp @@ -652,14 +652,25 @@ namespace const std::optional> & object_metadata_, ThreadPoolCallbackRunnerUnsafe schedule_, bool for_disk_s3_, - BlobStorageLogWriterPtr blob_storage_log_) - : UploadHelper(client_ptr_, dest_bucket_, dest_key_, request_settings_, object_metadata_, schedule_, for_disk_s3_, blob_storage_log_, getLogger("copyS3File")) + BlobStorageLogWriterPtr blob_storage_log_, + std::function fallback_method_) + : UploadHelper( + client_ptr_, + dest_bucket_, + dest_key_, + request_settings_, + object_metadata_, + schedule_, + for_disk_s3_, + blob_storage_log_, + getLogger("copyS3File")) , src_bucket(src_bucket_) , src_key(src_key_) , offset(src_offset_) , size(src_size_) , supports_multipart_copy(client_ptr_->supportsMultiPartCopy()) , read_settings(read_settings_) + , fallback_method(std::move(fallback_method_)) { } @@ -682,14 +693,7 @@ namespace size_t size; bool supports_multipart_copy; const ReadSettings read_settings; - - CreateReadBuffer getSourceObjectReadBuffer() - { - return [&] - { - return std::make_unique(client_ptr, src_bucket, src_key, "", request_settings, read_settings); - }; - } + std::function fallback_method; void performSingleOperationCopy() { @@ -744,28 +748,21 @@ namespace if (outcome.GetError().GetExceptionName() == "EntityTooLarge" || outcome.GetError().GetExceptionName() == "InvalidRequest" || outcome.GetError().GetExceptionName() == "InvalidArgument" || + outcome.GetError().GetExceptionName() == "AccessDenied" || (outcome.GetError().GetExceptionName() == "InternalError" && outcome.GetError().GetResponseCode() == Aws::Http::HttpResponseCode::GATEWAY_TIMEOUT && outcome.GetError().GetMessage().contains("use the Rewrite method in the JSON API"))) { - if (!supports_multipart_copy) + if (!supports_multipart_copy || outcome.GetError().GetExceptionName() == "AccessDenied") { - LOG_INFO(log, "Multipart upload using copy is not supported, will try regular upload for Bucket: {}, Key: {}, Object size: {}", - dest_bucket, - dest_key, - size); - copyDataToS3File( - getSourceObjectReadBuffer(), - offset, - size, - client_ptr, + LOG_INFO( + log, + "Multipart upload using copy is not supported, will try regular upload for Bucket: {}, Key: {}, Object size: " + "{}", dest_bucket, dest_key, - request_settings, - blob_storage_log, - object_metadata, - schedule, - for_disk_s3); + size); + fallback_method(); break; } else @@ -859,17 +856,29 @@ void copyDataToS3File( ThreadPoolCallbackRunnerUnsafe schedule, bool for_disk_s3) { - CopyDataToFileHelper helper{create_read_buffer, offset, size, dest_s3_client, dest_bucket, dest_key, settings, object_metadata, schedule, for_disk_s3, blob_storage_log}; + CopyDataToFileHelper helper{ + create_read_buffer, + offset, + size, + dest_s3_client, + dest_bucket, + dest_key, + settings, + object_metadata, + schedule, + for_disk_s3, + blob_storage_log}; helper.performCopy(); } void copyS3File( - const std::shared_ptr & s3_client, + const std::shared_ptr & src_s3_client, const String & src_bucket, const String & src_key, size_t src_offset, size_t src_size, + std::shared_ptr dest_s3_client, const String & dest_bucket, const String & dest_key, const S3Settings::RequestSettings & settings, @@ -879,19 +888,50 @@ void copyS3File( ThreadPoolCallbackRunnerUnsafe schedule, bool for_disk_s3) { - if (settings.allow_native_copy) + if (!dest_s3_client) + dest_s3_client = src_s3_client; + + std::function fallback_method = [&] { - CopyFileHelper helper{s3_client, src_bucket, src_key, src_offset, src_size, dest_bucket, dest_key, settings, read_settings, object_metadata, schedule, for_disk_s3, blob_storage_log}; - helper.performCopy(); - } - else + auto create_read_buffer + = [&] { return std::make_unique(src_s3_client, src_bucket, src_key, "", settings, read_settings); }; + + copyDataToS3File( + create_read_buffer, + src_offset, + src_size, + dest_s3_client, + dest_bucket, + dest_key, + settings, + blob_storage_log, + object_metadata, + schedule, + for_disk_s3); + }; + + if (!settings.allow_native_copy) { - auto create_read_buffer = [&] - { - return std::make_unique(s3_client, src_bucket, src_key, "", settings, read_settings); - }; - copyDataToS3File(create_read_buffer, src_offset, src_size, s3_client, dest_bucket, dest_key, settings, blob_storage_log, object_metadata, schedule, for_disk_s3); + fallback_method(); + return; } + + CopyFileHelper helper{ + src_s3_client, + src_bucket, + src_key, + src_offset, + src_size, + dest_bucket, + dest_key, + settings, + read_settings, + object_metadata, + schedule, + for_disk_s3, + blob_storage_log, + std::move(fallback_method)}; + helper.performCopy(); } } diff --git a/src/IO/S3/copyS3File.h b/src/IO/S3/copyS3File.h index d5da4d260b1..85b3870ddbf 100644 --- a/src/IO/S3/copyS3File.h +++ b/src/IO/S3/copyS3File.h @@ -31,11 +31,12 @@ using CreateReadBuffer = std::function()>; /// /// read_settings - is used for throttling in case of native copy is not possible void copyS3File( - const std::shared_ptr & s3_client, + const std::shared_ptr & src_s3_client, const String & src_bucket, const String & src_key, size_t src_offset, size_t src_size, + std::shared_ptr dest_s3_client, const String & dest_bucket, const String & dest_key, const S3Settings::RequestSettings & settings, diff --git a/src/IO/S3/getObjectInfo.cpp b/src/IO/S3/getObjectInfo.cpp index eee3da9fb74..78efda4ae57 100644 --- a/src/IO/S3/getObjectInfo.cpp +++ b/src/IO/S3/getObjectInfo.cpp @@ -53,7 +53,7 @@ namespace const auto & result = outcome.GetResult(); ObjectInfo object_info; object_info.size = static_cast(result.GetContentLength()); - object_info.last_modification_time = result.GetLastModified().Millis() / 1000; + object_info.last_modification_time = result.GetLastModified().Seconds(); if (with_metadata) object_info.metadata = result.GetMetadata(); diff --git a/src/IO/S3Common.cpp b/src/IO/S3Common.cpp index 4583b2bb0ac..78c51fcb29c 100644 --- a/src/IO/S3Common.cpp +++ b/src/IO/S3Common.cpp @@ -174,8 +174,11 @@ void AuthSettings::updateFrom(const AuthSettings & from) if (!from.session_token.empty()) session_token = from.session_token; - headers = from.headers; - region = from.region; + if (!from.headers.empty()) + headers = from.headers; + if (!from.region.empty()) + region = from.region; + server_side_encryption_customer_key_base64 = from.server_side_encryption_customer_key_base64; server_side_encryption_kms_config = from.server_side_encryption_kms_config; diff --git a/src/IO/WriteBuffer.h b/src/IO/WriteBuffer.h index 1ceb938e454..ef4e0058ec3 100644 --- a/src/IO/WriteBuffer.h +++ b/src/IO/WriteBuffer.h @@ -41,7 +41,7 @@ public: * If direct write is performed into [position(), buffer().end()) and its length is not enough, * you need to fill it first (i.g with write call), after it the capacity is regained. */ - inline void next() + void next() { if (!offset()) return; @@ -69,7 +69,7 @@ public: /// Calling finalize() in the destructor of derived classes is a bad practice. virtual ~WriteBuffer(); - inline void nextIfAtEnd() + void nextIfAtEnd() { if (!hasPendingData()) next(); @@ -96,7 +96,7 @@ public: } } - inline void write(char x) + void write(char x) { if (finalized) throw Exception{ErrorCodes::LOGICAL_ERROR, "Cannot write to finalized buffer"}; diff --git a/src/IO/ZstdDeflatingAppendableWriteBuffer.h b/src/IO/ZstdDeflatingAppendableWriteBuffer.h index d9c4f32d6da..34cdf03df25 100644 --- a/src/IO/ZstdDeflatingAppendableWriteBuffer.h +++ b/src/IO/ZstdDeflatingAppendableWriteBuffer.h @@ -27,7 +27,7 @@ class ZstdDeflatingAppendableWriteBuffer : public BufferWithOwnMemory; /// Frame end block. If we read non-empty file and see no such flag we should add it. - static inline constexpr ZSTDLastBlock ZSTD_CORRECT_TERMINATION_LAST_BLOCK = {0x01, 0x00, 0x00}; + static constexpr ZSTDLastBlock ZSTD_CORRECT_TERMINATION_LAST_BLOCK = {0x01, 0x00, 0x00}; ZstdDeflatingAppendableWriteBuffer( std::unique_ptr out_, diff --git a/src/IO/examples/read_buffer_from_hdfs.cpp b/src/IO/examples/read_buffer_from_hdfs.cpp index c499542fedb..91139ad94eb 100644 --- a/src/IO/examples/read_buffer_from_hdfs.cpp +++ b/src/IO/examples/read_buffer_from_hdfs.cpp @@ -2,7 +2,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/IO/tests/gtest_writebuffer_s3.cpp b/src/IO/tests/gtest_writebuffer_s3.cpp index 447b72ed7c6..4a4d7cc0fc2 100644 --- a/src/IO/tests/gtest_writebuffer_s3.cpp +++ b/src/IO/tests/gtest_writebuffer_s3.cpp @@ -547,7 +547,7 @@ public: std::unique_ptr getWriteBuffer(String file_name = "file") { S3Settings::RequestSettings request_settings; - request_settings.updateFromSettings(settings); + request_settings.updateFromSettingsIfChanged(settings); client->resetCounters(); diff --git a/src/Interpreters/Cache/QueryCache.cpp b/src/Interpreters/Cache/QueryCache.cpp index fafe50c170f..a3fe8c2e779 100644 --- a/src/Interpreters/Cache/QueryCache.cpp +++ b/src/Interpreters/Cache/QueryCache.cpp @@ -126,6 +126,11 @@ bool astContainsSystemTables(ASTPtr ast, ContextPtr context) namespace { +bool isQueryCacheRelatedSetting(const String & setting_name) +{ + return setting_name.starts_with("query_cache_") || setting_name.ends_with("_query_cache"); +} + class RemoveQueryCacheSettingsMatcher { public: @@ -141,7 +146,7 @@ public: auto is_query_cache_related_setting = [](const auto & change) { - return change.name.starts_with("query_cache_") || change.name.ends_with("_query_cache"); + return isQueryCacheRelatedSetting(change.name); }; std::erase_if(set_clause->changes, is_query_cache_related_setting); @@ -177,6 +182,40 @@ ASTPtr removeQueryCacheSettings(ASTPtr ast) return transformed_ast; } +IAST::Hash calculateAstHash(ASTPtr ast, const String & current_database, const Settings & settings) +{ + ast = removeQueryCacheSettings(ast); + + /// Hash the AST, we must consider aliases (issue #56258) + SipHash hash; + ast->updateTreeHash(hash, /*ignore_aliases=*/ false); + + /// Also hash the database specified via SQL `USE db`, otherwise identifiers in same query (AST) may mean different columns in different + /// tables (issue #64136) + hash.update(current_database); + + /// Finally, hash the (changed) settings as they might affect the query result (e.g. think of settings `additional_table_filters` and `limit`). + /// Note: allChanged() returns the settings in random order. Also, update()-s of the composite hash must be done in deterministic order. + /// Therefore, collect and sort the settings first, then hash them. + Settings::Range changed_settings = settings.allChanged(); + std::vector> changed_settings_sorted; /// (name, value) + for (const auto & setting : changed_settings) + { + const String & name = setting.getName(); + const String & value = setting.getValueString(); + if (!isQueryCacheRelatedSetting(name)) /// see removeQueryCacheSettings() why this is a good idea + changed_settings_sorted.push_back({name, value}); + } + std::sort(changed_settings_sorted.begin(), changed_settings_sorted.end(), [](auto & lhs, auto & rhs) { return lhs.first < rhs.first; }); + for (const auto & setting : changed_settings_sorted) + { + hash.update(setting.first); + hash.update(setting.second); + } + + return getSipHash128AsPair(hash); +} + String queryStringFromAST(ASTPtr ast) { WriteBufferFromOwnString buf; @@ -186,17 +225,16 @@ String queryStringFromAST(ASTPtr ast) } -/// Hashing of ASTs must consider aliases (issue #56258) -static constexpr bool ignore_aliases = false; - QueryCache::Key::Key( ASTPtr ast_, + const String & current_database, + const Settings & settings, Block header_, std::optional user_id_, const std::vector & current_user_roles_, bool is_shared_, std::chrono::time_point expires_at_, bool is_compressed_) - : ast_hash(removeQueryCacheSettings(ast_)->getTreeHash(ignore_aliases)) + : ast_hash(calculateAstHash(ast_, current_database, settings)) , header(header_) , user_id(user_id_) , current_user_roles(current_user_roles_) @@ -207,8 +245,8 @@ QueryCache::Key::Key( { } -QueryCache::Key::Key(ASTPtr ast_, std::optional user_id_, const std::vector & current_user_roles_) - : QueryCache::Key(ast_, {}, user_id_, current_user_roles_, false, std::chrono::system_clock::from_time_t(1), false) /// dummy values for everything != AST or user name +QueryCache::Key::Key(ASTPtr ast_, const String & current_database, const Settings & settings, std::optional user_id_, const std::vector & current_user_roles_) + : QueryCache::Key(ast_, current_database, settings, {}, user_id_, current_user_roles_, false, std::chrono::system_clock::from_time_t(1), false) /// dummy values for everything != AST, current database, user name/roles { } diff --git a/src/Interpreters/Cache/QueryCache.h b/src/Interpreters/Cache/QueryCache.h index 814cad37f82..461197cac32 100644 --- a/src/Interpreters/Cache/QueryCache.h +++ b/src/Interpreters/Cache/QueryCache.h @@ -14,6 +14,8 @@ namespace DB { +struct Settings; + /// Does AST contain non-deterministic functions like rand() and now()? bool astContainsNonDeterministicFunctions(ASTPtr ast, ContextPtr context); @@ -88,6 +90,8 @@ public: /// Ctor to construct a Key for writing into query cache. Key(ASTPtr ast_, + const String & current_database, + const Settings & settings, Block header_, std::optional user_id_, const std::vector & current_user_roles_, bool is_shared_, @@ -95,7 +99,7 @@ public: bool is_compressed); /// Ctor to construct a Key for reading from query cache (this operation only needs the AST + user name). - Key(ASTPtr ast_, std::optional user_id_, const std::vector & current_user_roles_); + Key(ASTPtr ast_, const String & current_database, const Settings & settings, std::optional user_id_, const std::vector & current_user_roles_); bool operator==(const Key & other) const; }; diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp index 3d08219155f..464ce2ec586 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -407,6 +407,10 @@ void executeQueryWithParallelReplicas( std::shared_ptr storage_limits, QueryPlanStepPtr read_from_merge_tree) { + auto logger = getLogger("executeQueryWithParallelReplicas"); + LOG_DEBUG(logger, "Executing read from {}, header {}, query ({}), stage {} with parallel replicas", + storage_id.getNameForLogs(), header.dumpStructure(), query_ast->formatForLogging(), processed_stage); + const auto & settings = context->getSettingsRef(); /// check cluster for parallel replicas diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 1bd9601dd7e..e1d82a8f604 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -160,6 +160,8 @@ namespace CurrentMetrics extern const Metric TablesLoaderForegroundThreadsScheduled; extern const Metric IOWriterThreadsScheduled; extern const Metric AttachedTable; + extern const Metric AttachedView; + extern const Metric AttachedDictionary; extern const Metric AttachedDatabase; extern const Metric PartsActive; } @@ -359,6 +361,8 @@ struct ContextSharedPart : boost::noncopyable /// No lock required for format_schema_path modified only during initialization std::atomic_size_t max_database_num_to_warn = 1000lu; std::atomic_size_t max_table_num_to_warn = 5000lu; + std::atomic_size_t max_view_num_to_warn = 10000lu; + std::atomic_size_t max_dictionary_num_to_warn = 1000lu; std::atomic_size_t max_part_num_to_warn = 100000lu; String format_schema_path; /// Path to a directory that contains schema files used by input formats. String google_protos_path; /// Path to a directory that contains the proto files for the well-known Protobuf types. @@ -935,6 +939,10 @@ Strings Context::getWarnings() const common_warnings = shared->warnings; if (CurrentMetrics::get(CurrentMetrics::AttachedTable) > static_cast(shared->max_table_num_to_warn)) common_warnings.emplace_back(fmt::format("The number of attached tables is more than {}", shared->max_table_num_to_warn)); + if (CurrentMetrics::get(CurrentMetrics::AttachedView) > static_cast(shared->max_view_num_to_warn)) + common_warnings.emplace_back(fmt::format("The number of attached views is more than {}", shared->max_view_num_to_warn)); + if (CurrentMetrics::get(CurrentMetrics::AttachedDictionary) > static_cast(shared->max_dictionary_num_to_warn)) + common_warnings.emplace_back(fmt::format("The number of attached dictionaries is more than {}", shared->max_dictionary_num_to_warn)); if (CurrentMetrics::get(CurrentMetrics::AttachedDatabase) > static_cast(shared->max_database_num_to_warn)) common_warnings.emplace_back(fmt::format("The number of attached databases is more than {}", shared->max_database_num_to_warn)); if (CurrentMetrics::get(CurrentMetrics::PartsActive) > static_cast(shared->max_part_num_to_warn)) @@ -3711,6 +3719,18 @@ void Context::setMaxTableNumToWarn(size_t max_table_to_warn) shared->max_table_num_to_warn= max_table_to_warn; } +void Context::setMaxViewNumToWarn(size_t max_view_to_warn) +{ + SharedLockGuard lock(shared->mutex); + shared->max_view_num_to_warn= max_view_to_warn; +} + +void Context::setMaxDictionaryNumToWarn(size_t max_dictionary_to_warn) +{ + SharedLockGuard lock(shared->mutex); + shared->max_dictionary_num_to_warn= max_dictionary_to_warn; +} + void Context::setMaxDatabaseNumToWarn(size_t max_database_to_warn) { SharedLockGuard lock(shared->mutex); diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 7f663773e52..814534f7035 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -861,6 +861,8 @@ public: const HTTPHeaderFilter & getHTTPHeaderFilter() const; void setMaxTableNumToWarn(size_t max_table_to_warn); + void setMaxViewNumToWarn(size_t max_view_to_warn); + void setMaxDictionaryNumToWarn(size_t max_dictionary_to_warn); void setMaxDatabaseNumToWarn(size_t max_database_to_warn); void setMaxPartNumToWarn(size_t max_part_to_warn); /// The port that the server listens for executing SQL queries. diff --git a/src/Interpreters/DDLTask.h b/src/Interpreters/DDLTask.h index 5a8a5bfb184..0b0460b26c8 100644 --- a/src/Interpreters/DDLTask.h +++ b/src/Interpreters/DDLTask.h @@ -133,10 +133,10 @@ struct DDLTaskBase virtual void createSyncedNodeIfNeed(const ZooKeeperPtr & /*zookeeper*/) {} - inline String getActiveNodePath() const { return fs::path(entry_path) / "active" / host_id_str; } - inline String getFinishedNodePath() const { return fs::path(entry_path) / "finished" / host_id_str; } - inline String getShardNodePath() const { return fs::path(entry_path) / "shards" / getShardID(); } - inline String getSyncedNodePath() const { return fs::path(entry_path) / "synced" / host_id_str; } + String getActiveNodePath() const { return fs::path(entry_path) / "active" / host_id_str; } + String getFinishedNodePath() const { return fs::path(entry_path) / "finished" / host_id_str; } + String getShardNodePath() const { return fs::path(entry_path) / "shards" / getShardID(); } + String getSyncedNodePath() const { return fs::path(entry_path) / "synced" / host_id_str; } static String getLogEntryName(UInt32 log_entry_number); static UInt32 getLogEntryNumber(const String & log_entry_name); diff --git a/src/Interpreters/DatabaseCatalog.h b/src/Interpreters/DatabaseCatalog.h index 5caa034e0e9..37125d9900c 100644 --- a/src/Interpreters/DatabaseCatalog.h +++ b/src/Interpreters/DatabaseCatalog.h @@ -284,7 +284,7 @@ private: static constexpr UInt64 bits_for_first_level = 4; using UUIDToStorageMap = std::array; - static inline size_t getFirstLevelIdx(const UUID & uuid) + static size_t getFirstLevelIdx(const UUID & uuid) { return UUIDHelpers::getHighBytes(uuid) >> (64 - bits_for_first_level); } diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 4fdd804452d..b30fc8bc092 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -977,6 +977,13 @@ void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const if (as_create.is_ordinary_view) throw Exception(ErrorCodes::INCORRECT_QUERY, "Cannot CREATE a table AS {}, it is a View", qualified_name); + if (as_create.is_materialized_view && as_create.to_table_id) + throw Exception( + ErrorCodes::INCORRECT_QUERY, + "Cannot CREATE a table AS {}, it is a Materialized View without storage. Use \"AS `{}`\" instead", + qualified_name, + as_create.to_table_id.getQualifiedName()); + if (as_create.is_live_view) throw Exception(ErrorCodes::INCORRECT_QUERY, "Cannot CREATE a table AS {}, it is a Live View", qualified_name); @@ -1493,7 +1500,7 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create, validateVirtualColumns(*res); - if (!res->supportsDynamicSubcolumns() && hasDynamicSubcolumns(res->getInMemoryMetadataPtr()->getColumns())) + if (!res->supportsDynamicSubcolumnsDeprecated() && hasDynamicSubcolumns(res->getInMemoryMetadataPtr()->getColumns()) && mode <= LoadingStrictnessLevel::CREATE) { throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot create table with column of type Object, " diff --git a/src/Interpreters/InterpreterDeleteQuery.cpp b/src/Interpreters/InterpreterDeleteQuery.cpp index ee774994145..9cfb8e486cb 100644 --- a/src/Interpreters/InterpreterDeleteQuery.cpp +++ b/src/Interpreters/InterpreterDeleteQuery.cpp @@ -25,6 +25,7 @@ namespace ErrorCodes extern const int TABLE_IS_READ_ONLY; extern const int SUPPORT_IS_DISABLED; extern const int BAD_ARGUMENTS; + extern const int NOT_IMPLEMENTED; } @@ -107,7 +108,19 @@ BlockIO InterpreterDeleteQuery::execute() } else { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "DELETE query is not supported for table {}", table->getStorageID().getFullTableName()); + /// Currently just better exception for the case of a table with projection, + /// can act differently according to the setting. + if (table->hasProjection()) + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "DELETE query is not supported for table {} as it has projections. " + "User should drop all the projections manually before running the query", + table->getStorageID().getFullTableName()); + } + + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "DELETE query is not supported for table {}", + table->getStorageID().getFullTableName()); } } diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 12677c422b8..128854e87ba 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -552,7 +552,11 @@ BlockIO InterpreterInsertQuery::execute() { /// Change query sample block columns to Nullable to allow inserting nullable columns, where NULL values will be substituted with /// default column values (in AddingDefaultsTransform), so all values will be cast correctly. - if (isNullableOrLowCardinalityNullable(input_columns[col_idx].type) && !isNullableOrLowCardinalityNullable(query_columns[col_idx].type) && !isVariant(query_columns[col_idx].type) && output_columns.has(query_columns[col_idx].name)) + if (isNullableOrLowCardinalityNullable(input_columns[col_idx].type) + && !isNullableOrLowCardinalityNullable(query_columns[col_idx].type) + && !isVariant(query_columns[col_idx].type) + && !isDynamic(query_columns[col_idx].type) + && output_columns.has(query_columns[col_idx].name)) query_sample_block.setColumn(col_idx, ColumnWithTypeAndName(makeNullableOrLowCardinalityNullable(query_columns[col_idx].column), makeNullableOrLowCardinalityNullable(query_columns[col_idx].type), query_columns[col_idx].name)); } } diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index 498030a1552..d3526941b33 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -51,11 +51,12 @@ #include #include #include -#include #include -#include +#include +#include +#include +#include #include -#include #include #include #include @@ -500,17 +501,17 @@ BlockIO InterpreterSystemQuery::execute() StorageFile::getSchemaCache(getContext()).clear(); #if USE_AWS_S3 if (caches_to_drop.contains("S3")) - StorageS3::getSchemaCache(getContext()).clear(); + StorageObjectStorage::getSchemaCache(getContext(), StorageS3Configuration::type_name).clear(); #endif #if USE_HDFS if (caches_to_drop.contains("HDFS")) - StorageHDFS::getSchemaCache(getContext()).clear(); + StorageObjectStorage::getSchemaCache(getContext(), StorageHDFSConfiguration::type_name).clear(); #endif if (caches_to_drop.contains("URL")) StorageURL::getSchemaCache(getContext()).clear(); #if USE_AZURE_BLOB_STORAGE if (caches_to_drop.contains("AZURE")) - StorageAzureBlob::getSchemaCache(getContext()).clear(); + StorageObjectStorage::getSchemaCache(getContext(), StorageAzureConfiguration::type_name).clear(); #endif break; } diff --git a/src/Interpreters/JIT/CHJIT.cpp b/src/Interpreters/JIT/CHJIT.cpp index 046d0b4fc10..21c773ee1d7 100644 --- a/src/Interpreters/JIT/CHJIT.cpp +++ b/src/Interpreters/JIT/CHJIT.cpp @@ -119,9 +119,9 @@ public: return result; } - inline size_t getAllocatedSize() const { return allocated_size; } + size_t getAllocatedSize() const { return allocated_size; } - inline size_t getPageSize() const { return page_size; } + size_t getPageSize() const { return page_size; } ~PageArena() { @@ -177,10 +177,10 @@ private: { } - inline void * base() const { return pages_base; } - inline size_t pagesSize() const { return pages_size; } - inline size_t pageSize() const { return page_size; } - inline size_t blockSize() const { return pages_size * page_size; } + void * base() const { return pages_base; } + size_t pagesSize() const { return pages_size; } + size_t pageSize() const { return page_size; } + size_t blockSize() const { return pages_size * page_size; } private: void * pages_base; @@ -298,7 +298,7 @@ public: return true; } - inline size_t allocatedSize() const + size_t allocatedSize() const { size_t data_size = rw_page_arena.getAllocatedSize() + ro_page_arena.getAllocatedSize(); size_t code_size = ex_page_arena.getAllocatedSize(); diff --git a/src/Interpreters/JIT/CHJIT.h b/src/Interpreters/JIT/CHJIT.h index fc883802426..89d446fd3b3 100644 --- a/src/Interpreters/JIT/CHJIT.h +++ b/src/Interpreters/JIT/CHJIT.h @@ -85,7 +85,7 @@ public: /** Total compiled code size for module that are currently valid. */ - inline size_t getCompiledCodeSize() const { return compiled_code_size.load(std::memory_order_relaxed); } + size_t getCompiledCodeSize() const { return compiled_code_size.load(std::memory_order_relaxed); } private: diff --git a/src/Interpreters/JIT/CompileDAG.h b/src/Interpreters/JIT/CompileDAG.h index 13ec763b6fc..8db4ac5e110 100644 --- a/src/Interpreters/JIT/CompileDAG.h +++ b/src/Interpreters/JIT/CompileDAG.h @@ -65,17 +65,17 @@ public: nodes.emplace_back(std::move(node)); } - inline size_t getNodesCount() const { return nodes.size(); } - inline size_t getInputNodesCount() const { return input_nodes_count; } + size_t getNodesCount() const { return nodes.size(); } + size_t getInputNodesCount() const { return input_nodes_count; } - inline Node & operator[](size_t index) { return nodes[index]; } - inline const Node & operator[](size_t index) const { return nodes[index]; } + Node & operator[](size_t index) { return nodes[index]; } + const Node & operator[](size_t index) const { return nodes[index]; } - inline Node & front() { return nodes.front(); } - inline const Node & front() const { return nodes.front(); } + Node & front() { return nodes.front(); } + const Node & front() const { return nodes.front(); } - inline Node & back() { return nodes.back(); } - inline const Node & back() const { return nodes.back(); } + Node & back() { return nodes.back(); } + const Node & back() const { return nodes.back(); } private: std::vector nodes; diff --git a/src/Interpreters/JoinUtils.h b/src/Interpreters/JoinUtils.h index ff48f34d82c..f15ee2c2fb2 100644 --- a/src/Interpreters/JoinUtils.h +++ b/src/Interpreters/JoinUtils.h @@ -49,7 +49,7 @@ public: return nullptr; } - inline bool isRowFiltered(size_t row) const + bool isRowFiltered(size_t row) const { return !assert_cast(*column).getData()[row]; } diff --git a/src/Interpreters/TreeCNFConverter.h b/src/Interpreters/TreeCNFConverter.h index 8258412f1a6..ec4b029eee9 100644 --- a/src/Interpreters/TreeCNFConverter.h +++ b/src/Interpreters/TreeCNFConverter.h @@ -164,6 +164,12 @@ public: void pushNotIn(CNFQuery::AtomicFormula & atom); +/// Reduces CNF groups by removing mutually exclusive atoms +/// found across groups, in case other atoms are identical. +/// Might require multiple passes to complete reduction. +/// +/// Example: +/// (x OR y) AND (x OR !y) -> x template TAndGroup reduceOnceCNFStatements(const TAndGroup & groups) { @@ -175,10 +181,19 @@ TAndGroup reduceOnceCNFStatements(const TAndGroup & groups) bool inserted = false; for (const auto & atom : group) { - copy.erase(atom); using AtomType = std::decay_t; AtomType negative_atom(atom); negative_atom.negative = !atom.negative; + + // Sikpping erase-insert for mutually exclusive atoms within + // single group, since it won't insert negative atom, which + // will break the logic of this rule + if (copy.contains(negative_atom)) + { + continue; + } + + copy.erase(atom); copy.insert(negative_atom); if (groups.contains(copy)) @@ -209,6 +224,10 @@ bool isCNFGroupSubset(const TOrGroup & left, const TOrGroup & right) return true; } +/// Removes CNF groups if subset group is found in CNF. +/// +/// Example: +/// (x OR y) AND (x) -> x template TAndGroup filterCNFSubsets(const TAndGroup & groups) { diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index 50c28fbc8b2..a3c5a7ed3ed 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -1188,6 +1188,33 @@ bool TreeRewriterResult::collectUsedColumns(const ASTPtr & query, bool is_select } } + /// Check for dynamic subcolums in unknown required columns. + if (!unknown_required_source_columns.empty()) + { + for (const NameAndTypePair & pair : source_columns_ordinary) + { + if (!pair.type->hasDynamicSubcolumns()) + continue; + + for (auto it = unknown_required_source_columns.begin(); it != unknown_required_source_columns.end();) + { + auto [column_name, dynamic_subcolumn_name] = Nested::splitName(*it); + + if (column_name == pair.name) + { + if (auto dynamic_subcolumn_type = pair.type->tryGetSubcolumnType(dynamic_subcolumn_name)) + { + source_columns.emplace_back(*it, dynamic_subcolumn_type); + it = unknown_required_source_columns.erase(it); + continue; + } + } + + ++it; + } + } + } + if (!unknown_required_source_columns.empty()) { constexpr auto format_string = "Missing columns: {} while processing query: '{}', required columns:{}{}"; diff --git a/src/Interpreters/WhereConstraintsOptimizer.cpp b/src/Interpreters/WhereConstraintsOptimizer.cpp index 979a4f4dbf5..456cf76b987 100644 --- a/src/Interpreters/WhereConstraintsOptimizer.cpp +++ b/src/Interpreters/WhereConstraintsOptimizer.cpp @@ -91,6 +91,22 @@ bool checkIfGroupAlwaysTrueGraph(const CNFQuery::OrGroup & group, const Comparis return false; } +bool checkIfGroupAlwaysTrueAtoms(const CNFQuery::OrGroup & group) +{ + /// Filters out groups containing mutually exclusive atoms, + /// since these groups are always True + + for (const auto & atom : group) + { + auto negated(atom); + negated.negative = !atom.negative; + if (group.contains(negated)) + { + return true; + } + } + return false; +} bool checkIfAtomAlwaysFalseFullMatch(const CNFQuery::AtomicFormula & atom, const ConstraintsDescription & constraints_description) { @@ -158,7 +174,8 @@ void WhereConstraintsOptimizer::perform() .filterAlwaysTrueGroups([&compare_graph, this](const auto & group) { /// remove always true groups from CNF - return !checkIfGroupAlwaysTrueFullMatch(group, metadata_snapshot->getConstraints()) && !checkIfGroupAlwaysTrueGraph(group, compare_graph); + return !checkIfGroupAlwaysTrueFullMatch(group, metadata_snapshot->getConstraints()) + && !checkIfGroupAlwaysTrueGraph(group, compare_graph) && !checkIfGroupAlwaysTrueAtoms(group); }) .filterAlwaysFalseAtoms([&compare_graph, this](const auto & atom) { diff --git a/src/Interpreters/convertFieldToType.cpp b/src/Interpreters/convertFieldToType.cpp index 25085ff4823..9363e3d83eb 100644 --- a/src/Interpreters/convertFieldToType.cpp +++ b/src/Interpreters/convertFieldToType.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include @@ -504,7 +505,7 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID else if (const DataTypeVariant * type_variant = typeid_cast(&type)) { /// If we have type hint and Variant contains such type, no need to convert field. - if (from_type_hint && type_variant->tryGetVariantDiscriminator(*from_type_hint)) + if (from_type_hint && type_variant->tryGetVariantDiscriminator(from_type_hint->getName())) return src; /// Create temporary column and check if we can insert this field to the variant. @@ -513,6 +514,11 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID if (col->tryInsert(src)) return src; } + else if (isDynamic(type)) + { + /// We can insert any field to Dynamic column. + return src; + } /// Conversion from string by parsing. if (src.getType() == Field::Types::String) diff --git a/src/Interpreters/examples/hash_map_string_3.cpp b/src/Interpreters/examples/hash_map_string_3.cpp index 57e36bed545..44ee3542bd9 100644 --- a/src/Interpreters/examples/hash_map_string_3.cpp +++ b/src/Interpreters/examples/hash_map_string_3.cpp @@ -96,7 +96,7 @@ inline bool operator==(StringRef_CompareAlwaysTrue, StringRef_CompareAlwaysTrue) struct FastHash64 { - static inline uint64_t mix(uint64_t h) + static uint64_t mix(uint64_t h) { h ^= h >> 23; h *= 0x2127599bf4325c37ULL; diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index e33394d68a4..9c5436517ab 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -1093,6 +1093,15 @@ static std::tuple executeQueryImpl( && (ast->as() || ast->as()); QueryCache::Usage query_cache_usage = QueryCache::Usage::None; + /// If the query runs with "use_query_cache = 1", we first probe if the query cache already contains the query result (if yes: + /// return result from cache). If doesn't, we execute the query normally and write the result into the query cache. Both steps use a + /// hash of the AST, the current database and the settings as cache key. Unfortunately, the settings are in some places internally + /// modified between steps 1 and 2 (= during query execution) - this is silly but hard to forbid. As a result, the hashes no longer + /// match and the cache is rendered ineffective. Therefore make a copy of the settings and use it for steps 1 and 2. + std::optional settings_copy; + if (can_use_query_cache) + settings_copy = settings; + if (!async_insert) { /// If it is a non-internal SELECT, and passive (read) use of the query cache is enabled, and the cache knows the query, then set @@ -1101,7 +1110,7 @@ static std::tuple executeQueryImpl( { if (can_use_query_cache && settings.enable_reads_from_query_cache) { - QueryCache::Key key(ast, context->getUserID(), context->getCurrentRoles()); + QueryCache::Key key(ast, context->getCurrentDatabase(), *settings_copy, context->getUserID(), context->getCurrentRoles()); QueryCache::Reader reader = query_cache->createReader(key); if (reader.hasCacheEntryForKey()) { @@ -1226,7 +1235,7 @@ static std::tuple executeQueryImpl( && (!ast_contains_system_tables || system_table_handling == QueryCacheSystemTableHandling::Save)) { QueryCache::Key key( - ast, res.pipeline.getHeader(), + ast, context->getCurrentDatabase(), *settings_copy, res.pipeline.getHeader(), context->getUserID(), context->getCurrentRoles(), settings.query_cache_share_between_users, std::chrono::system_clock::now() + std::chrono::seconds(settings.query_cache_ttl), diff --git a/src/Interpreters/parseColumnsListForTableFunction.cpp b/src/Interpreters/parseColumnsListForTableFunction.cpp index 27c364073ae..3529863a623 100644 --- a/src/Interpreters/parseColumnsListForTableFunction.cpp +++ b/src/Interpreters/parseColumnsListForTableFunction.cpp @@ -40,7 +40,7 @@ void validateDataType(const DataTypePtr & type_to_check, const DataTypeValidatio if (!settings.allow_experimental_object_type) { - if (data_type.hasDynamicSubcolumns()) + if (data_type.hasDynamicSubcolumnsDeprecated()) { throw Exception( ErrorCodes::ILLEGAL_COLUMN, @@ -107,6 +107,18 @@ void validateDataType(const DataTypePtr & type_to_check, const DataTypeValidatio } } } + + if (!settings.allow_experimental_dynamic_type) + { + if (data_type.hasDynamicSubcolumns()) + { + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Cannot create column with type '{}' because experimental Dynamic type is not allowed. " + "Set setting allow_experimental_dynamic_type = 1 in order to allow it", + data_type.getName()); + } + } }; validate_callback(*type_to_check); diff --git a/src/Interpreters/parseColumnsListForTableFunction.h b/src/Interpreters/parseColumnsListForTableFunction.h index ffb59bfa457..e2d2bc97ff7 100644 --- a/src/Interpreters/parseColumnsListForTableFunction.h +++ b/src/Interpreters/parseColumnsListForTableFunction.h @@ -21,6 +21,7 @@ struct DataTypeValidationSettings , allow_experimental_variant_type(settings.allow_experimental_variant_type) , allow_suspicious_variant_types(settings.allow_suspicious_variant_types) , validate_nested_types(settings.validate_experimental_and_suspicious_types_inside_nested_types) + , allow_experimental_dynamic_type(settings.allow_experimental_dynamic_type) { } @@ -30,6 +31,7 @@ struct DataTypeValidationSettings bool allow_experimental_variant_type = true; bool allow_suspicious_variant_types = true; bool validate_nested_types = true; + bool allow_experimental_dynamic_type = true; }; void validateDataType(const DataTypePtr & type, const DataTypeValidationSettings & settings); diff --git a/src/Loggers/Loggers.cpp b/src/Loggers/Loggers.cpp index 4b17469f4d7..0bd4b94d999 100644 --- a/src/Loggers/Loggers.cpp +++ b/src/Loggers/Loggers.cpp @@ -263,7 +263,7 @@ void Loggers::buildLoggers(Poco::Util::AbstractConfiguration & config, Poco::Log } } #ifndef WITHOUT_TEXT_LOG - if (config.has("text_log")) + if (allowTextLog() && config.has("text_log")) { String text_log_level_str = config.getString("text_log.level", "trace"); int text_log_level = Poco::Logger::parseLevel(text_log_level_str); diff --git a/src/Loggers/Loggers.h b/src/Loggers/Loggers.h index 9eff731a4c5..9923d66ebcb 100644 --- a/src/Loggers/Loggers.h +++ b/src/Loggers/Loggers.h @@ -23,6 +23,10 @@ public: /// Close log files. On next log write files will be reopened. void closeLogs(Poco::Logger & logger); + virtual ~Loggers() = default; + +protected: + virtual bool allowTextLog() const { return true; } private: Poco::AutoPtr log_file; diff --git a/src/Loggers/OwnSplitChannel.cpp b/src/Loggers/OwnSplitChannel.cpp index fee33781c27..dc51a13e01f 100644 --- a/src/Loggers/OwnSplitChannel.cpp +++ b/src/Loggers/OwnSplitChannel.cpp @@ -107,6 +107,10 @@ void OwnSplitChannel::logSplit(const Poco::Message & msg) [[maybe_unused]] bool push_result = logs_queue->emplace(std::move(columns)); } + auto text_log_locked = text_log.lock(); + if (!text_log_locked) + return; + /// Also log to system.text_log table, if message is not too noisy auto text_log_max_priority_loaded = text_log_max_priority.load(std::memory_order_relaxed); if (text_log_max_priority_loaded && msg.getPriority() <= text_log_max_priority_loaded) @@ -146,10 +150,7 @@ void OwnSplitChannel::logSplit(const Poco::Message & msg) #undef SET_VALUE_IF_EXISTS - std::shared_ptr> text_log_locked{}; - text_log_locked = text_log.lock(); - if (text_log_locked) - text_log_locked->push(std::move(elem)); + text_log_locked->push(std::move(elem)); } #endif } diff --git a/src/Loggers/OwnSplitChannel.h b/src/Loggers/OwnSplitChannel.h index b75554eefc4..7ca27cf6584 100644 --- a/src/Loggers/OwnSplitChannel.h +++ b/src/Loggers/OwnSplitChannel.h @@ -1,7 +1,6 @@ #pragma once #include -#include #include #include #include diff --git a/src/Parsers/ParserDataType.cpp b/src/Parsers/ParserDataType.cpp index 05c9a2cd306..b5bc9f89990 100644 --- a/src/Parsers/ParserDataType.cpp +++ b/src/Parsers/ParserDataType.cpp @@ -1,6 +1,7 @@ #include #include +#include #include #include #include @@ -14,18 +15,60 @@ namespace DB namespace { +/// Parser of Dynamic type arguments: Dynamic(max_types=N) +class DynamicArgumentsParser : public IParserBase +{ +private: + const char * getName() const override { return "Dynamic data type optional argument"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override + { + ASTPtr identifier; + ParserIdentifier identifier_parser; + if (!identifier_parser.parse(pos, identifier, expected)) + return false; + + if (pos->type != TokenType::Equals) + { + expected.add(pos, "equals operator"); + return false; + } + + ++pos; + + ASTPtr number; + ParserNumber number_parser; + if (!number_parser.parse(pos, number, expected)) + return false; + + node = makeASTFunction("equals", identifier, number); + return true; + } +}; + /// Wrapper to allow mixed lists of nested and normal types. /// Parameters are either: /// - Nested table elements; /// - Enum element in form of 'a' = 1; /// - literal; -/// - another data type (or identifier) +/// - Dynamic type arguments; +/// - another data type (or identifier); class ParserDataTypeArgument : public IParserBase { +public: + explicit ParserDataTypeArgument(std::string_view type_name_) : type_name(type_name_) + { + } + private: const char * getName() const override { return "data type argument"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override { + if (type_name == "Dynamic") + { + DynamicArgumentsParser parser; + return parser.parse(pos, node, expected); + } + ParserNestedTable nested_parser; ParserDataType data_type_parser; ParserAllCollectionsOfLiterals literal_parser(false); @@ -40,6 +83,8 @@ private: || literal_parser.parse(pos, node, expected) || data_type_parser.parse(pos, node, expected); } + + std::string_view type_name; }; } @@ -148,7 +193,7 @@ bool ParserDataType::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ++pos; /// Parse optional parameters - ParserList args_parser(std::make_unique(), std::make_unique(TokenType::Comma)); + ParserList args_parser(std::make_unique(type_name), std::make_unique(TokenType::Comma)); ASTPtr expr_list_args; if (!args_parser.parse(pos, expr_list_args, expected)) diff --git a/src/Planner/PlannerExpressionAnalysis.cpp b/src/Planner/PlannerExpressionAnalysis.cpp index 0b5767407e7..2a95234057c 100644 --- a/src/Planner/PlannerExpressionAnalysis.cpp +++ b/src/Planner/PlannerExpressionAnalysis.cpp @@ -444,7 +444,6 @@ SortAnalysisResult analyzeSort(const QueryNode & query_node, for (auto & interpolate_node : interpolate_list_node.getNodes()) { auto & interpolate_node_typed = interpolate_node->as(); - interpolate_actions_visitor.visit(interpolate_actions_dag, interpolate_node_typed.getExpression()); interpolate_actions_visitor.visit(interpolate_actions_dag, interpolate_node_typed.getInterpolateExpression()); } diff --git a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h index ab16aaa56ad..58f78e5af42 100644 --- a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h +++ b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h @@ -80,7 +80,7 @@ public: bool allowVariableNumberOfColumns() const override { return format_settings.custom.allow_variable_number_of_columns; } bool checkForSuffixImpl(bool check_eof); - inline void skipSpaces() { if (ignore_spaces) skipWhitespaceIfAny(*buf, true); } + void skipSpaces() { if (ignore_spaces) skipWhitespaceIfAny(*buf, true); } EscapingRule getEscapingRule() const override { return format_settings.custom.escaping_rule; } diff --git a/src/Processors/Formats/Impl/Parquet/ParquetColumnReader.h b/src/Processors/Formats/Impl/Parquet/ParquetColumnReader.h new file mode 100644 index 00000000000..2c78949e8e1 --- /dev/null +++ b/src/Processors/Formats/Impl/Parquet/ParquetColumnReader.h @@ -0,0 +1,30 @@ +#pragma once + +#include +#include + +namespace parquet +{ + +class PageReader; +class ColumnChunkMetaData; +class DataPageV1; +class DataPageV2; + +} + +namespace DB +{ + +class ParquetColumnReader +{ +public: + virtual ColumnWithTypeAndName readBatch(UInt64 rows_num, const String & name) = 0; + + virtual ~ParquetColumnReader() = default; +}; + +using ParquetColReaderPtr = std::unique_ptr; +using ParquetColReaders = std::vector; + +} diff --git a/src/Processors/Formats/Impl/Parquet/ParquetDataBuffer.h b/src/Processors/Formats/Impl/Parquet/ParquetDataBuffer.h new file mode 100644 index 00000000000..57df6f59f72 --- /dev/null +++ b/src/Processors/Formats/Impl/Parquet/ParquetDataBuffer.h @@ -0,0 +1,182 @@ +#pragma once + +#include + +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int PARQUET_EXCEPTION; +} + +template struct ToArrowDecimal; + +template <> struct ToArrowDecimal>> +{ + using ArrowDecimal = arrow::Decimal128; +}; + +template <> struct ToArrowDecimal>> +{ + using ArrowDecimal = arrow::Decimal256; +}; + + +class ParquetDataBuffer +{ +private: + +public: + ParquetDataBuffer(const uint8_t * data_, UInt64 available_, UInt8 datetime64_scale_ = DataTypeDateTime64::default_scale) + : data(reinterpret_cast(data_)), available(available_), datetime64_scale(datetime64_scale_) {} + + template + void ALWAYS_INLINE readValue(TValue & dst) + { + readBytes(&dst, sizeof(TValue)); + } + + void ALWAYS_INLINE readBytes(void * dst, size_t bytes) + { + checkAvaible(bytes); + std::copy(data, data + bytes, reinterpret_cast(dst)); + consume(bytes); + } + + void ALWAYS_INLINE readDateTime64FromInt96(DateTime64 & dst) + { + static const int max_scale_num = 9; + static const UInt64 pow10[max_scale_num + 1] + = {1000000000, 100000000, 10000000, 1000000, 100000, 10000, 1000, 100, 10, 1}; + static const UInt64 spd = 60 * 60 * 24; + static const UInt64 scaled_day[max_scale_num + 1] + = {spd, + 10 * spd, + 100 * spd, + 1000 * spd, + 10000 * spd, + 100000 * spd, + 1000000 * spd, + 10000000 * spd, + 100000000 * spd, + 1000000000 * spd}; + + parquet::Int96 tmp; + readValue(tmp); + auto decoded = parquet::DecodeInt96Timestamp(tmp); + + uint64_t scaled_nano = decoded.nanoseconds / pow10[datetime64_scale]; + dst = static_cast(decoded.days_since_epoch * scaled_day[datetime64_scale] + scaled_nano); + } + + /** + * This method should only be used to read string whose elements size is small. + * Because memcpySmallAllowReadWriteOverflow15 instead of memcpy is used according to ColumnString::indexImpl + */ + void ALWAYS_INLINE readString(ColumnString & column, size_t cursor) + { + // refer to: PlainByteArrayDecoder::DecodeArrowDense in encoding.cc + // deserializeBinarySSE2 in SerializationString.cpp + checkAvaible(4); + auto value_len = ::arrow::util::SafeLoadAs(getArrowData()); + if (unlikely(value_len < 0 || value_len > INT32_MAX - 4)) + { + throw Exception(ErrorCodes::PARQUET_EXCEPTION, "Invalid or corrupted value_len '{}'", value_len); + } + consume(4); + checkAvaible(value_len); + + auto chars_cursor = column.getChars().size(); + column.getChars().resize(chars_cursor + value_len + 1); + + memcpySmallAllowReadWriteOverflow15(&column.getChars()[chars_cursor], data, value_len); + column.getChars().back() = 0; + + column.getOffsets().data()[cursor] = column.getChars().size(); + consume(value_len); + } + + template + void ALWAYS_INLINE readOverBigDecimal(TDecimal * out, Int32 elem_bytes_num) + { + using TArrowDecimal = typename ToArrowDecimal::ArrowDecimal; + + checkAvaible(elem_bytes_num); + + // refer to: RawBytesToDecimalBytes in reader_internal.cc, Decimal128::FromBigEndian in decimal.cc + auto status = TArrowDecimal::FromBigEndian(getArrowData(), elem_bytes_num); + assert(status.ok()); + status.ValueUnsafe().ToBytes(reinterpret_cast(out)); + consume(elem_bytes_num); + } + +private: + const Int8 * data; + UInt64 available; + const UInt8 datetime64_scale; + + void ALWAYS_INLINE checkAvaible(UInt64 num) + { + if (unlikely(available < num)) + { + throw Exception(ErrorCodes::PARQUET_EXCEPTION, "Consuming {} bytes while {} available", num, available); + } + } + + const uint8_t * ALWAYS_INLINE getArrowData() { return reinterpret_cast(data); } + + void ALWAYS_INLINE consume(UInt64 num) + { + data += num; + available -= num; + } +}; + + +class LazyNullMap +{ +public: + explicit LazyNullMap(UInt64 size_) : size(size_), col_nullable(nullptr) {} + + template + requires std::is_integral_v + void setNull(T cursor) + { + initialize(); + null_map[cursor] = 1; + } + + template + requires std::is_integral_v + void setNull(T cursor, UInt32 count) + { + initialize(); + memset(null_map + cursor, 1, count); + } + + ColumnPtr getNullableCol() { return col_nullable; } + +private: + UInt64 size; + UInt8 * null_map; + ColumnPtr col_nullable; + + void initialize() + { + if (likely(col_nullable)) + { + return; + } + auto col = ColumnVector::create(size); + null_map = col->getData().data(); + col_nullable = std::move(col); + memset(null_map, 0, size); + } +}; + +} diff --git a/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.cpp b/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.cpp new file mode 100644 index 00000000000..b8e4db8700c --- /dev/null +++ b/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.cpp @@ -0,0 +1,585 @@ +#include "ParquetDataValuesReader.h" + +#include +#include + +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; + extern const int PARQUET_EXCEPTION; +} + +RleValuesReader::RleValuesReader( + std::unique_ptr bit_reader_, Int32 bit_width_) + : bit_reader(std::move(bit_reader_)), bit_width(bit_width_) +{ + if (unlikely(bit_width >= 64)) + { + // e.g. in GetValue_ in bit_stream_utils.h, uint64 type is used to read bit values + throw Exception(ErrorCodes::PARQUET_EXCEPTION, "unsupported bit width {}", bit_width); + } +} + +void RleValuesReader::nextGroup() +{ + // refer to: + // RleDecoder::NextCounts in rle_encoding.h and VectorizedRleValuesReader::readNextGroup in Spark + UInt32 indicator_value = 0; + [[maybe_unused]] auto read_res = bit_reader->GetVlqInt(&indicator_value); + assert(read_res); + + cur_group_is_packed = indicator_value & 1; + cur_group_size = indicator_value >> 1; + + if (cur_group_is_packed) + { + cur_group_size *= 8; + cur_packed_bit_values.resize(cur_group_size); + bit_reader->GetBatch(bit_width, cur_packed_bit_values.data(), cur_group_size); + } + else + { + cur_value = 0; + read_res = bit_reader->GetAligned((bit_width + 7) / 8, &cur_value); + assert(read_res); + } + cur_group_cursor = 0; + +} + +template +void RleValuesReader::visitValues( + UInt32 num_values, IndividualVisitor && individual_visitor, RepeatedVisitor && repeated_visitor) +{ + // refer to: VisitNullBitmapInline in visitor_inline.h + while (num_values) + { + nextGroupIfNecessary(); + auto cur_count = std::min(num_values, curGroupLeft()); + + if (cur_group_is_packed) + { + for (auto i = cur_group_cursor; i < cur_group_cursor + cur_count; i++) + { + individual_visitor(cur_packed_bit_values[i]); + } + } + else + { + repeated_visitor(cur_count, cur_value); + } + cur_group_cursor += cur_count; + num_values -= cur_count; + } +} + +template +void RleValuesReader::visitNullableValues( + size_t cursor, + UInt32 num_values, + Int32 max_def_level, + LazyNullMap & null_map, + IndividualVisitor && individual_visitor, + RepeatedVisitor && repeated_visitor) +{ + while (num_values) + { + nextGroupIfNecessary(); + auto cur_count = std::min(num_values, curGroupLeft()); + + if (cur_group_is_packed) + { + for (auto i = cur_group_cursor; i < cur_group_cursor + cur_count; i++) + { + if (cur_packed_bit_values[i] == max_def_level) + { + individual_visitor(cursor); + } + else + { + null_map.setNull(cursor); + } + cursor++; + } + } + else + { + if (cur_value == max_def_level) + { + repeated_visitor(cursor, cur_count); + } + else + { + null_map.setNull(cursor, cur_count); + } + cursor += cur_count; + } + cur_group_cursor += cur_count; + num_values -= cur_count; + } +} + +template +void RleValuesReader::visitNullableBySteps( + size_t cursor, + UInt32 num_values, + Int32 max_def_level, + IndividualNullVisitor && individual_null_visitor, + SteppedValidVisitor && stepped_valid_visitor, + RepeatedVisitor && repeated_visitor) +{ + // refer to: + // RleDecoder::GetBatch in rle_encoding.h and TypedColumnReaderImpl::ReadBatchSpaced in column_reader.cc + // VectorizedRleValuesReader::readBatchInternal in Spark + while (num_values > 0) + { + nextGroupIfNecessary(); + auto cur_count = std::min(num_values, curGroupLeft()); + + if (cur_group_is_packed) + { + valid_index_steps.resize(cur_count + 1); + valid_index_steps[0] = 0; + auto step_idx = 0; + auto null_map_cursor = cursor; + + for (auto i = cur_group_cursor; i < cur_group_cursor + cur_count; i++) + { + if (cur_packed_bit_values[i] == max_def_level) + { + valid_index_steps[++step_idx] = 1; + } + else + { + individual_null_visitor(null_map_cursor); + if (unlikely(valid_index_steps[step_idx] == UINT8_MAX)) + { + throw Exception(ErrorCodes::PARQUET_EXCEPTION, "unsupported packed values number"); + } + valid_index_steps[step_idx]++; + } + null_map_cursor++; + } + valid_index_steps.resize(step_idx + 1); + stepped_valid_visitor(cursor, valid_index_steps); + } + else + { + repeated_visitor(cur_value == max_def_level, cursor, cur_count); + } + + cursor += cur_count; + cur_group_cursor += cur_count; + num_values -= cur_count; + } +} + +template +void RleValuesReader::setValues(TValue * res_values, UInt32 num_values, ValueGetter && val_getter) +{ + visitValues( + num_values, + /* individual_visitor */ [&](Int32 val) + { + *(res_values++) = val_getter(val); + }, + /* repeated_visitor */ [&](UInt32 count, Int32 val) + { + std::fill(res_values, res_values + count, val_getter(val)); + res_values += count; + } + ); +} + +template +void RleValuesReader::setValueBySteps( + TValue * res_values, + const std::vector & col_data_steps, + ValueGetter && val_getter) +{ + auto step_iterator = col_data_steps.begin(); + res_values += *(step_iterator++); + + visitValues( + static_cast(col_data_steps.size() - 1), + /* individual_visitor */ [&](Int32 val) + { + *res_values = val_getter(val); + res_values += *(step_iterator++); + }, + /* repeated_visitor */ [&](UInt32 count, Int32 val) + { + auto getted_val = val_getter(val); + for (UInt32 i = 0; i < count; i++) + { + *res_values = getted_val; + res_values += *(step_iterator++); + } + } + ); +} + + +namespace +{ + +template +TValue * getResizedPrimitiveData(TColumn & column, size_t size) +{ + auto old_size = column.size(); + column.getData().resize(size); + memset(column.getData().data() + old_size, 0, sizeof(TValue) * (size - old_size)); + return column.getData().data(); +} + +} // anoynomous namespace + + +template <> +void ParquetPlainValuesReader::readBatch( + MutableColumnPtr & col_ptr, LazyNullMap & null_map, UInt32 num_values) +{ + auto & column = *assert_cast(col_ptr.get()); + auto cursor = column.size(); + + column.getOffsets().resize(cursor + num_values); + auto * offset_data = column.getOffsets().data(); + auto & chars = column.getChars(); + + def_level_reader->visitValues( + num_values, + /* individual_visitor */ [&](Int32 val) + { + if (val == max_def_level) + { + plain_data_buffer.readString(column, cursor); + } + else + { + chars.push_back(0); + offset_data[cursor] = chars.size(); + null_map.setNull(cursor); + } + cursor++; + }, + /* repeated_visitor */ [&](UInt32 count, Int32 val) + { + if (val == max_def_level) + { + for (UInt32 i = 0; i < count; i++) + { + plain_data_buffer.readString(column, cursor); + cursor++; + } + } + else + { + null_map.setNull(cursor, count); + + auto chars_size_bak = chars.size(); + chars.resize(chars_size_bak + count); + memset(&chars[chars_size_bak], 0, count); + + auto idx = cursor; + cursor += count; + for (auto val_offset = chars_size_bak; idx < cursor; idx++) + { + offset_data[idx] = ++val_offset; + } + } + } + ); +} + + +template <> +void ParquetPlainValuesReader, ParquetReaderTypes::TimestampInt96>::readBatch( + MutableColumnPtr & col_ptr, LazyNullMap & null_map, UInt32 num_values) +{ + auto cursor = col_ptr->size(); + auto * column_data = getResizedPrimitiveData( + *assert_cast *>(col_ptr.get()), cursor + num_values); + + def_level_reader->visitNullableValues( + cursor, + num_values, + max_def_level, + null_map, + /* individual_visitor */ [&](size_t nest_cursor) + { + plain_data_buffer.readDateTime64FromInt96(column_data[nest_cursor]); + }, + /* repeated_visitor */ [&](size_t nest_cursor, UInt32 count) + { + auto * col_data_pos = column_data + nest_cursor; + for (UInt32 i = 0; i < count; i++) + { + plain_data_buffer.readDateTime64FromInt96(col_data_pos[i]); + } + } + ); +} + +template +void ParquetPlainValuesReader::readBatch( + MutableColumnPtr & col_ptr, LazyNullMap & null_map, UInt32 num_values) +{ + auto cursor = col_ptr->size(); + auto * column_data = getResizedPrimitiveData(*assert_cast(col_ptr.get()), cursor + num_values); + using TValue = std::decay_t; + + def_level_reader->visitNullableValues( + cursor, + num_values, + max_def_level, + null_map, + /* individual_visitor */ [&](size_t nest_cursor) + { + plain_data_buffer.readValue(column_data[nest_cursor]); + }, + /* repeated_visitor */ [&](size_t nest_cursor, UInt32 count) + { + plain_data_buffer.readBytes(column_data + nest_cursor, count * sizeof(TValue)); + } + ); +} + + +template +void ParquetFixedLenPlainReader::readBatch( + MutableColumnPtr & col_ptr, LazyNullMap & null_map, UInt32 num_values) +{ + if constexpr (std::same_as> || std::same_as>) + { + readOverBigDecimal(col_ptr, null_map, num_values); + } + else + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "unsupported type"); + } +} + +template +void ParquetFixedLenPlainReader::readOverBigDecimal( + MutableColumnPtr & col_ptr, LazyNullMap & null_map, UInt32 num_values) +{ + auto cursor = col_ptr->size(); + auto * column_data = getResizedPrimitiveData( + *assert_cast(col_ptr.get()), cursor + num_values); + + def_level_reader->visitNullableValues( + cursor, + num_values, + max_def_level, + null_map, + /* individual_visitor */ [&](size_t nest_cursor) + { + plain_data_buffer.readOverBigDecimal(column_data + nest_cursor, elem_bytes_num); + }, + /* repeated_visitor */ [&](size_t nest_cursor, UInt32 count) + { + auto col_data_pos = column_data + nest_cursor; + for (UInt32 i = 0; i < count; i++) + { + plain_data_buffer.readOverBigDecimal(col_data_pos + i, elem_bytes_num); + } + } + ); +} + + +template +void ParquetRleLCReader::readBatch( + MutableColumnPtr & index_col, LazyNullMap & null_map, UInt32 num_values) +{ + auto cursor = index_col->size(); + auto * column_data = getResizedPrimitiveData(*assert_cast(index_col.get()), cursor + num_values); + + bool has_null = false; + + // in ColumnLowCardinality, first element in dictionary is null + // so we should increase each value by 1 in parquet index + auto val_getter = [&](Int32 val) { return val + 1; }; + + def_level_reader->visitNullableBySteps( + cursor, + num_values, + max_def_level, + /* individual_null_visitor */ [&](size_t nest_cursor) + { + column_data[nest_cursor] = 0; + has_null = true; + }, + /* stepped_valid_visitor */ [&](size_t nest_cursor, const std::vector & valid_index_steps) + { + rle_data_reader->setValueBySteps(column_data + nest_cursor, valid_index_steps, val_getter); + }, + /* repeated_visitor */ [&](bool is_valid, size_t nest_cursor, UInt32 count) + { + if (is_valid) + { + rle_data_reader->setValues(column_data + nest_cursor, count, val_getter); + } + else + { + auto data_pos = column_data + nest_cursor; + std::fill(data_pos, data_pos + count, 0); + has_null = true; + } + } + ); + if (has_null) + { + null_map.setNull(0); + } +} + +template <> +void ParquetRleDictReader::readBatch( + MutableColumnPtr & col_ptr, LazyNullMap & null_map, UInt32 num_values) +{ + auto & column = *assert_cast(col_ptr.get()); + auto cursor = column.size(); + std::vector value_cache; + + const auto & dict_chars = static_cast(page_dictionary).getChars(); + const auto & dict_offsets = static_cast(page_dictionary).getOffsets(); + + column.getOffsets().resize(cursor + num_values); + auto * offset_data = column.getOffsets().data(); + auto & chars = column.getChars(); + + auto append_nulls = [&](UInt8 num) + { + for (auto limit = cursor + num; cursor < limit; cursor++) + { + chars.push_back(0); + offset_data[cursor] = chars.size(); + null_map.setNull(cursor); + } + }; + + auto append_string = [&](Int32 dict_idx) + { + auto dict_chars_cursor = dict_offsets[dict_idx - 1]; + auto value_len = dict_offsets[dict_idx] - dict_chars_cursor; + auto chars_cursor = chars.size(); + chars.resize(chars_cursor + value_len); + + memcpySmallAllowReadWriteOverflow15(&chars[chars_cursor], &dict_chars[dict_chars_cursor], value_len); + offset_data[cursor] = chars.size(); + cursor++; + }; + + auto val_getter = [&](Int32 val) { return val + 1; }; + + def_level_reader->visitNullableBySteps( + cursor, + num_values, + max_def_level, + /* individual_null_visitor */ [&](size_t) {}, + /* stepped_valid_visitor */ [&](size_t, const std::vector & valid_index_steps) + { + value_cache.resize(valid_index_steps.size()); + rle_data_reader->setValues( + value_cache.data() + 1, static_cast(valid_index_steps.size() - 1), val_getter); + + append_nulls(valid_index_steps[0]); + for (size_t i = 1; i < valid_index_steps.size(); i++) + { + append_string(value_cache[i]); + append_nulls(valid_index_steps[i] - 1); + } + }, + /* repeated_visitor */ [&](bool is_valid, size_t, UInt32 count) + { + if (is_valid) + { + value_cache.resize(count); + rle_data_reader->setValues(value_cache.data(), count, val_getter); + for (UInt32 i = 0; i < count; i++) + { + append_string(value_cache[i]); + } + } + else + { + append_nulls(count); + } + } + ); +} + +template +void ParquetRleDictReader::readBatch( + MutableColumnPtr & col_ptr, LazyNullMap & null_map, UInt32 num_values) +{ + auto cursor = col_ptr->size(); + auto * column_data = getResizedPrimitiveData(*assert_cast(col_ptr.get()), cursor + num_values); + const auto & dictionary_array = static_cast(page_dictionary).getData(); + + auto val_getter = [&](Int32 val) { return dictionary_array[val]; }; + def_level_reader->visitNullableBySteps( + cursor, + num_values, + max_def_level, + /* individual_null_visitor */ [&](size_t nest_cursor) + { + null_map.setNull(nest_cursor); + }, + /* stepped_valid_visitor */ [&](size_t nest_cursor, const std::vector & valid_index_steps) + { + rle_data_reader->setValueBySteps(column_data + nest_cursor, valid_index_steps, val_getter); + }, + /* repeated_visitor */ [&](bool is_valid, size_t nest_cursor, UInt32 count) + { + if (is_valid) + { + rle_data_reader->setValues(column_data + nest_cursor, count, val_getter); + } + else + { + null_map.setNull(nest_cursor, count); + } + } + ); +} + + +template class ParquetPlainValuesReader; +template class ParquetPlainValuesReader; +template class ParquetPlainValuesReader; +template class ParquetPlainValuesReader; +template class ParquetPlainValuesReader; +template class ParquetPlainValuesReader; +template class ParquetPlainValuesReader>; +template class ParquetPlainValuesReader>; +template class ParquetPlainValuesReader>; +template class ParquetPlainValuesReader; + +template class ParquetFixedLenPlainReader>; +template class ParquetFixedLenPlainReader>; + +template class ParquetRleLCReader; +template class ParquetRleLCReader; +template class ParquetRleLCReader; + +template class ParquetRleDictReader; +template class ParquetRleDictReader; +template class ParquetRleDictReader; +template class ParquetRleDictReader; +template class ParquetRleDictReader; +template class ParquetRleDictReader; +template class ParquetRleDictReader>; +template class ParquetRleDictReader>; +template class ParquetRleDictReader>; +template class ParquetRleDictReader>; +template class ParquetRleDictReader>; +template class ParquetRleDictReader; + +} diff --git a/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.h b/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.h new file mode 100644 index 00000000000..fbccb612b3c --- /dev/null +++ b/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.h @@ -0,0 +1,265 @@ +#pragma once + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include "ParquetDataBuffer.h" + +namespace DB +{ + +class RleValuesReader +{ +public: + RleValuesReader(std::unique_ptr bit_reader_, Int32 bit_width_); + + /** + * @brief Used when the bit_width is 0, so all elements have same value. + */ + explicit RleValuesReader(UInt32 total_size, Int32 val = 0) + : bit_reader(nullptr), bit_width(0), cur_group_size(total_size), cur_value(val), cur_group_is_packed(false) + {} + + void nextGroup(); + + void nextGroupIfNecessary() { if (cur_group_cursor >= cur_group_size) nextGroup(); } + + UInt32 curGroupLeft() const { return cur_group_size - cur_group_cursor; } + + /** + * @brief Visit num_values elements. + * For RLE encoding, for same group, the value is same, so they can be visited repeatedly. + * For BitPacked encoding, the values may be different with each other, so they must be visited individual. + * + * @tparam IndividualVisitor A callback with signature: void(Int32 val) + * @tparam RepeatedVisitor A callback with signature: void(UInt32 count, Int32 val) + */ + template + void visitValues(UInt32 num_values, IndividualVisitor && individual_visitor, RepeatedVisitor && repeated_visitor); + + /** + * @brief Visit num_values elements by parsed nullability. + * If the parsed value is same as max_def_level, then it is processed as null value. + * + * @tparam IndividualVisitor A callback with signature: void(size_t cursor) + * @tparam RepeatedVisitor A callback with signature: void(size_t cursor, UInt32 count) + * + * Because the null map is processed, so only the callbacks only need to process the valid data. + */ + template + void visitNullableValues( + size_t cursor, + UInt32 num_values, + Int32 max_def_level, + LazyNullMap & null_map, + IndividualVisitor && individual_visitor, + RepeatedVisitor && repeated_visitor); + + /** + * @brief Visit num_values elements by parsed nullability. + * It may be inefficient to process the valid data individually like in visitNullableValues, + * so a valid_index_steps index array is generated first, in order to process valid data continuously. + * + * @tparam IndividualNullVisitor A callback with signature: void(size_t cursor), used to process null value + * @tparam SteppedValidVisitor A callback with signature: + * void(size_t cursor, const std::vector & valid_index_steps) + * valid_index_steps records the gap size between two valid elements, + * i-th item in valid_index_steps describes how many elements there are + * from i-th valid element (include) to (i+1)-th valid element (exclude). + * + * take following BitPacked group values for example, and assuming max_def_level is 1: + * [1, 0, 1, 1, 0, 1 ] + * null valid null null valid null + * the second line shows the corresponding validation state, + * then the valid_index_steps has values [1, 3, 2]. + * Please note that the the sum of valid_index_steps is same as elements number in this group. + * TODO the definition of valid_index_steps should be updated when supporting nested types + * + * @tparam RepeatedVisitor A callback with signature: void(bool is_valid, UInt32 cursor, UInt32 count) + */ + template + void visitNullableBySteps( + size_t cursor, + UInt32 num_values, + Int32 max_def_level, + IndividualNullVisitor && null_visitor, + SteppedValidVisitor && stepped_valid_visitor, + RepeatedVisitor && repeated_visitor); + + /** + * @brief Set the Values to column_data directly + * + * @tparam TValue The type of column data. + * @tparam ValueGetter A callback with signature: TValue(Int32 val) + */ + template + void setValues(TValue * res_values, UInt32 num_values, ValueGetter && val_getter); + + /** + * @brief Set the value by valid_index_steps generated in visitNullableBySteps. + * According to visitNullableBySteps, the elements number is valid_index_steps.size()-1, + * so valid_index_steps.size()-1 elements are read, and set to column_data with steps in valid_index_steps + */ + template + void setValueBySteps( + TValue * res_values, + const std::vector & col_data_steps, + ValueGetter && val_getter); + +private: + std::unique_ptr bit_reader; + + std::vector cur_packed_bit_values; + std::vector valid_index_steps; + + const Int32 bit_width; + + UInt32 cur_group_size = 0; + UInt32 cur_group_cursor = 0; + Int32 cur_value; + bool cur_group_is_packed; +}; + +using RleValuesReaderPtr = std::unique_ptr; + + +class ParquetDataValuesReader +{ +public: + virtual void readBatch(MutableColumnPtr & column, LazyNullMap & null_map, UInt32 num_values) = 0; + + virtual ~ParquetDataValuesReader() = default; +}; + +using ParquetDataValuesReaderPtr = std::unique_ptr; + + +enum class ParquetReaderTypes +{ + Normal, + TimestampInt96, +}; + +/** + * The definition level is RLE or BitPacked encoding, while data is read directly + */ +template +class ParquetPlainValuesReader : public ParquetDataValuesReader +{ +public: + + ParquetPlainValuesReader( + Int32 max_def_level_, + std::unique_ptr def_level_reader_, + ParquetDataBuffer data_buffer_) + : max_def_level(max_def_level_) + , def_level_reader(std::move(def_level_reader_)) + , plain_data_buffer(std::move(data_buffer_)) + {} + + void readBatch(MutableColumnPtr & col_ptr, LazyNullMap & null_map, UInt32 num_values) override; + +private: + Int32 max_def_level; + std::unique_ptr def_level_reader; + ParquetDataBuffer plain_data_buffer; +}; + +/** + * The data and definition level encoding are same as ParquetPlainValuesReader. + * But the element size is const and bigger than primitive data type. + */ +template +class ParquetFixedLenPlainReader : public ParquetDataValuesReader +{ +public: + + ParquetFixedLenPlainReader( + Int32 max_def_level_, + Int32 elem_bytes_num_, + std::unique_ptr def_level_reader_, + ParquetDataBuffer data_buffer_) + : max_def_level(max_def_level_) + , elem_bytes_num(elem_bytes_num_) + , def_level_reader(std::move(def_level_reader_)) + , plain_data_buffer(std::move(data_buffer_)) + {} + + void readOverBigDecimal(MutableColumnPtr & col_ptr, LazyNullMap & null_map, UInt32 num_values); + + void readBatch(MutableColumnPtr & col_ptr, LazyNullMap & null_map, UInt32 num_values) override; + +private: + Int32 max_def_level; + Int32 elem_bytes_num; + std::unique_ptr def_level_reader; + ParquetDataBuffer plain_data_buffer; +}; + +/** + * Read data according to the format of ColumnLowCardinality format. + * + * Only index and null column are processed in this class. + * And all null value is mapped to first index in dictionary, + * so the result index valued is added by one. +*/ +template +class ParquetRleLCReader : public ParquetDataValuesReader +{ +public: + ParquetRleLCReader( + Int32 max_def_level_, + std::unique_ptr def_level_reader_, + std::unique_ptr rle_data_reader_) + : max_def_level(max_def_level_) + , def_level_reader(std::move(def_level_reader_)) + , rle_data_reader(std::move(rle_data_reader_)) + {} + + void readBatch(MutableColumnPtr & index_col, LazyNullMap & null_map, UInt32 num_values) override; + +private: + Int32 max_def_level; + std::unique_ptr def_level_reader; + std::unique_ptr rle_data_reader; +}; + +/** + * The definition level is RLE or BitPacked encoded, + * and the index of dictionary is also RLE or BitPacked encoded. + * + * while the result is not parsed as a low cardinality column, + * instead, a normal column is generated. + */ +template +class ParquetRleDictReader : public ParquetDataValuesReader +{ +public: + ParquetRleDictReader( + Int32 max_def_level_, + std::unique_ptr def_level_reader_, + std::unique_ptr rle_data_reader_, + const IColumn & page_dictionary_) + : max_def_level(max_def_level_) + , def_level_reader(std::move(def_level_reader_)) + , rle_data_reader(std::move(rle_data_reader_)) + , page_dictionary(page_dictionary_) + {} + + void readBatch(MutableColumnPtr & col_ptr, LazyNullMap & null_map, UInt32 num_values) override; + +private: + Int32 max_def_level; + std::unique_ptr def_level_reader; + std::unique_ptr rle_data_reader; + const IColumn & page_dictionary; +}; + +} diff --git a/src/Processors/Formats/Impl/Parquet/ParquetLeafColReader.cpp b/src/Processors/Formats/Impl/Parquet/ParquetLeafColReader.cpp new file mode 100644 index 00000000000..9e1cae9bb65 --- /dev/null +++ b/src/Processors/Formats/Impl/Parquet/ParquetLeafColReader.cpp @@ -0,0 +1,542 @@ +#include "ParquetLeafColReader.h" + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NOT_IMPLEMENTED; + extern const int BAD_ARGUMENTS; + extern const int PARQUET_EXCEPTION; +} + +namespace +{ + +template +void visitColStrIndexType(size_t data_size, TypeVisitor && visitor) +{ + // refer to: DataTypeLowCardinality::createColumnUniqueImpl + if (data_size < (1ull << 8)) + { + visitor(static_cast(nullptr)); + } + else if (data_size < (1ull << 16)) + { + visitor(static_cast(nullptr)); + } + else if (data_size < (1ull << 32)) + { + visitor(static_cast(nullptr)); + } + else + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "unsupported data size {}", data_size); + } +} + +void reserveColumnStrRows(MutableColumnPtr & col, UInt64 rows_num) +{ + col->reserve(rows_num); + + /// Never reserve for too big size according to SerializationString::deserializeBinaryBulk + if (rows_num < 256 * 1024 * 1024) + { + try + { + static_cast(col.get())->getChars().reserve(rows_num); + } + catch (Exception & e) + { + e.addMessage("(limit = " + toString(rows_num) + ")"); + throw; + } + } +}; + + +template +ColumnPtr readDictPage( + const parquet::DictionaryPage & page, + const parquet::ColumnDescriptor & col_des, + const DataTypePtr & /* data_type */); + +template <> +ColumnPtr readDictPage( + const parquet::DictionaryPage & page, + const parquet::ColumnDescriptor & /* col_des */, + const DataTypePtr & /* data_type */) +{ + auto col = ColumnString::create(); + col->getOffsets().resize(page.num_values() + 1); + col->getChars().reserve(page.num_values()); + ParquetDataBuffer buffer(page.data(), page.size()); + + // will be read as low cardinality column + // in which case, the null key is set to first position, so the first string should be empty + col->getChars().push_back(0); + col->getOffsets()[0] = 1; + for (auto i = 1; i <= page.num_values(); i++) + { + buffer.readString(*col, i); + } + return col; +} + +template <> +ColumnPtr readDictPage>( + const parquet::DictionaryPage & page, + const parquet::ColumnDescriptor & col_des, + const DataTypePtr & data_type) +{ + + const auto & datetime_type = assert_cast(*data_type); + auto dict_col = ColumnDecimal::create(page.num_values(), datetime_type.getScale()); + auto * col_data = dict_col->getData().data(); + ParquetDataBuffer buffer(page.data(), page.size(), datetime_type.getScale()); + if (col_des.physical_type() == parquet::Type::INT64) + { + buffer.readBytes(dict_col->getData().data(), page.num_values() * sizeof(Int64)); + } + else + { + for (auto i = 0; i < page.num_values(); i++) + { + buffer.readDateTime64FromInt96(col_data[i]); + } + } + return dict_col; +} + +template +ColumnPtr readDictPage( + const parquet::DictionaryPage & page, + const parquet::ColumnDescriptor & col_des, + const DataTypePtr & /* data_type */) +{ + auto dict_col = TColumnDecimal::create(page.num_values(), col_des.type_scale()); + auto * col_data = dict_col->getData().data(); + ParquetDataBuffer buffer(page.data(), page.size()); + for (auto i = 0; i < page.num_values(); i++) + { + buffer.readOverBigDecimal(col_data + i, col_des.type_length()); + } + return dict_col; +} + +template requires (!std::is_same_v) +ColumnPtr readDictPage( + const parquet::DictionaryPage & page, + const parquet::ColumnDescriptor & col_des, + const DataTypePtr & /* data_type */) +{ + auto dict_col = TColumnDecimal::create(page.num_values(), col_des.type_scale()); + ParquetDataBuffer buffer(page.data(), page.size()); + buffer.readBytes(dict_col->getData().data(), page.num_values() * sizeof(typename TColumnDecimal::ValueType)); + return dict_col; +} + +template +ColumnPtr readDictPage( + const parquet::DictionaryPage & page, + const parquet::ColumnDescriptor & /* col_des */, + const DataTypePtr & /* data_type */) +{ + auto dict_col = TColumnVector::create(page.num_values()); + ParquetDataBuffer buffer(page.data(), page.size()); + buffer.readBytes(dict_col->getData().data(), page.num_values() * sizeof(typename TColumnVector::ValueType)); + return dict_col; +} + + +template +std::unique_ptr createPlainReader( + const parquet::ColumnDescriptor & col_des, + RleValuesReaderPtr def_level_reader, + ParquetDataBuffer buffer); + +template +std::unique_ptr createPlainReader( + const parquet::ColumnDescriptor & col_des, + RleValuesReaderPtr def_level_reader, + ParquetDataBuffer buffer) +{ + return std::make_unique>( + col_des.max_definition_level(), + col_des.type_length(), + std::move(def_level_reader), + std::move(buffer)); +} + +template +std::unique_ptr createPlainReader( + const parquet::ColumnDescriptor & col_des, + RleValuesReaderPtr def_level_reader, + ParquetDataBuffer buffer) +{ + if (std::is_same_v> && col_des.physical_type() == parquet::Type::INT96) + return std::make_unique>( + col_des.max_definition_level(), std::move(def_level_reader), std::move(buffer)); + else + return std::make_unique>( + col_des.max_definition_level(), std::move(def_level_reader), std::move(buffer)); +} + + +} // anonymous namespace + + +template +ParquetLeafColReader::ParquetLeafColReader( + const parquet::ColumnDescriptor & col_descriptor_, + DataTypePtr base_type_, + std::unique_ptr meta_, + std::unique_ptr reader_) + : col_descriptor(col_descriptor_) + , base_data_type(base_type_) + , col_chunk_meta(std::move(meta_)) + , parquet_page_reader(std::move(reader_)) + , log(&Poco::Logger::get("ParquetLeafColReader")) +{ +} + +template +ColumnWithTypeAndName ParquetLeafColReader::readBatch(UInt64 rows_num, const String & name) +{ + reading_rows_num = rows_num; + auto readPageIfEmpty = [&]() + { + while (!cur_page_values) readPage(); + }; + + // make sure the dict page has been read, and the status is updated + readPageIfEmpty(); + resetColumn(rows_num); + + while (rows_num) + { + // if dictionary page encountered, another page should be read + readPageIfEmpty(); + + auto read_values = static_cast(std::min(rows_num, static_cast(cur_page_values))); + data_values_reader->readBatch(column, *null_map, read_values); + + cur_page_values -= read_values; + rows_num -= read_values; + } + + return releaseColumn(name); +} + +template <> +void ParquetLeafColReader::resetColumn(UInt64 rows_num) +{ + if (reading_low_cardinality) + { + assert(dictionary); + visitColStrIndexType(dictionary->size(), [&](TColVec *) + { + column = TColVec::create(); + }); + + // only first position is used + null_map = std::make_unique(1); + column->reserve(rows_num); + } + else + { + null_map = std::make_unique(rows_num); + column = ColumnString::create(); + reserveColumnStrRows(column, rows_num); + } +} + +template +void ParquetLeafColReader::resetColumn(UInt64 rows_num) +{ + assert(!reading_low_cardinality); + + column = base_data_type->createColumn(); + column->reserve(rows_num); + null_map = std::make_unique(rows_num); +} + +template +void ParquetLeafColReader::degradeDictionary() +{ + // if last batch read all dictionary indices, then degrade is not needed this time + if (!column) + { + dictionary = nullptr; + return; + } + assert(dictionary && !column->empty()); + + null_map = std::make_unique(reading_rows_num); + auto col_existing = std::move(column); + column = ColumnString::create(); + reserveColumnStrRows(column, reading_rows_num); + + ColumnString & col_dest = *static_cast(column.get()); + const ColumnString & col_dict_str = *static_cast(dictionary.get()); + + visitColStrIndexType(dictionary->size(), [&](TColVec *) + { + const TColVec & col_src = *static_cast(col_existing.get()); + + // It will be easier to create a ColumnLowCardinality and call convertToFullColumn() on it, + // while the performance loss is ignorable, the implementation can be updated next time. + col_dest.getOffsets().resize(col_src.size()); + for (size_t i = 0; i < col_src.size(); i++) + { + auto src_idx = col_src.getData()[i]; + if (0 == src_idx) + { + null_map->setNull(i); + } + auto dict_chars_cursor = col_dict_str.getOffsets()[src_idx - 1]; + auto str_len = col_dict_str.getOffsets()[src_idx] - dict_chars_cursor; + auto dst_chars_cursor = col_dest.getChars().size(); + col_dest.getChars().resize(dst_chars_cursor + str_len); + + memcpySmallAllowReadWriteOverflow15( + &col_dest.getChars()[dst_chars_cursor], &col_dict_str.getChars()[dict_chars_cursor], str_len); + col_dest.getOffsets()[i] = col_dest.getChars().size(); + } + }); + dictionary = nullptr; + LOG_DEBUG(log, "degraded dictionary to normal column"); +} + +template +ColumnWithTypeAndName ParquetLeafColReader::releaseColumn(const String & name) +{ + DataTypePtr data_type = base_data_type; + if (reading_low_cardinality) + { + MutableColumnPtr col_unique; + if (null_map->getNullableCol()) + { + data_type = std::make_shared(data_type); + col_unique = ColumnUnique::create(dictionary->assumeMutable(), true); + } + else + { + col_unique = ColumnUnique::create(dictionary->assumeMutable(), false); + } + column = ColumnLowCardinality::create(std::move(col_unique), std::move(column), true); + data_type = std::make_shared(data_type); + } + else + { + if (null_map->getNullableCol()) + { + column = ColumnNullable::create(std::move(column), null_map->getNullableCol()->assumeMutable()); + data_type = std::make_shared(data_type); + } + } + ColumnWithTypeAndName res = {std::move(column), data_type, name}; + column = nullptr; + null_map = nullptr; + + return res; +} + +template +void ParquetLeafColReader::readPage() +{ + // refer to: ColumnReaderImplBase::ReadNewPage in column_reader.cc + auto cur_page = parquet_page_reader->NextPage(); + switch (cur_page->type()) + { + case parquet::PageType::DATA_PAGE: + readPageV1(*std::static_pointer_cast(cur_page)); + break; + case parquet::PageType::DATA_PAGE_V2: + readPageV2(*std::static_pointer_cast(cur_page)); + break; + case parquet::PageType::DICTIONARY_PAGE: + { + const parquet::DictionaryPage & dict_page = *std::static_pointer_cast(cur_page); + if (unlikely( + dict_page.encoding() != parquet::Encoding::PLAIN_DICTIONARY + && dict_page.encoding() != parquet::Encoding::PLAIN)) + { + throw Exception( + ErrorCodes::NOT_IMPLEMENTED, "Unsupported dictionary page encoding {}", dict_page.encoding()); + } + LOG_DEBUG(log, "{} values in dictionary page of column {}", dict_page.num_values(), col_descriptor.name()); + + dictionary = readDictPage(dict_page, col_descriptor, base_data_type); + if (unlikely(dictionary->size() < 2)) + { + // must not small than ColumnUnique::numSpecialValues() + dictionary->assumeMutable()->insertManyDefaults(2); + } + if (std::is_same_v) + { + reading_low_cardinality = true; + } + break; + } + default: + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported page type: {}", cur_page->type()); + } +} + +template +void ParquetLeafColReader::readPageV1(const parquet::DataPageV1 & page) +{ + static parquet::LevelDecoder repetition_level_decoder; + + cur_page_values = page.num_values(); + + // refer to: VectorizedColumnReader::readPageV1 in Spark and LevelDecoder::SetData in column_reader.cc + if (page.definition_level_encoding() != parquet::Encoding::RLE && col_descriptor.max_definition_level() != 0) + { + throw Exception(ErrorCodes::PARQUET_EXCEPTION, "Unsupported encoding: {}", page.definition_level_encoding()); + } + const auto * buffer = page.data(); + auto max_size = page.size(); + + if (col_descriptor.max_repetition_level() > 0) + { + auto rep_levels_bytes = repetition_level_decoder.SetData( + page.repetition_level_encoding(), col_descriptor.max_repetition_level(), 0, buffer, max_size); + buffer += rep_levels_bytes; + max_size -= rep_levels_bytes; + } + + assert(col_descriptor.max_definition_level() >= 0); + std::unique_ptr def_level_reader; + if (col_descriptor.max_definition_level() > 0) + { + auto bit_width = arrow::bit_util::Log2(col_descriptor.max_definition_level() + 1); + auto num_bytes = ::arrow::util::SafeLoadAs(buffer); + auto bit_reader = std::make_unique(buffer + 4, num_bytes); + num_bytes += 4; + buffer += num_bytes; + max_size -= num_bytes; + def_level_reader = std::make_unique(std::move(bit_reader), bit_width); + } + else + { + def_level_reader = std::make_unique(page.num_values()); + } + + switch (page.encoding()) + { + case parquet::Encoding::PLAIN: + { + if (reading_low_cardinality) + { + reading_low_cardinality = false; + degradeDictionary(); + } + + ParquetDataBuffer parquet_buffer = [&]() + { + if constexpr (!std::is_same_v, TColumn>) + return ParquetDataBuffer(buffer, max_size); + + auto scale = assert_cast(*base_data_type).getScale(); + return ParquetDataBuffer(buffer, max_size, scale); + }(); + data_values_reader = createPlainReader( + col_descriptor, std::move(def_level_reader), std::move(parquet_buffer)); + break; + } + case parquet::Encoding::RLE_DICTIONARY: + case parquet::Encoding::PLAIN_DICTIONARY: + { + if (unlikely(!dictionary)) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "dictionary should be existed"); + } + + // refer to: DictDecoderImpl::SetData in encoding.cc + auto bit_width = *buffer; + auto bit_reader = std::make_unique(++buffer, --max_size); + data_values_reader = createDictReader( + std::move(def_level_reader), std::make_unique(std::move(bit_reader), bit_width)); + break; + } + case parquet::Encoding::BYTE_STREAM_SPLIT: + case parquet::Encoding::DELTA_BINARY_PACKED: + case parquet::Encoding::DELTA_LENGTH_BYTE_ARRAY: + case parquet::Encoding::DELTA_BYTE_ARRAY: + throw Exception(ErrorCodes::PARQUET_EXCEPTION, "Unsupported encoding: {}", page.encoding()); + + default: + throw Exception(ErrorCodes::PARQUET_EXCEPTION, "Unknown encoding type: {}", page.encoding()); + } +} + +template +void ParquetLeafColReader::readPageV2(const parquet::DataPageV2 & /*page*/) +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "read page V2 is not implemented yet"); +} + +template +std::unique_ptr ParquetLeafColReader::createDictReader( + std::unique_ptr def_level_reader, std::unique_ptr rle_data_reader) +{ + if (reading_low_cardinality && std::same_as) + { + std::unique_ptr res; + visitColStrIndexType(dictionary->size(), [&](TCol *) + { + res = std::make_unique>( + col_descriptor.max_definition_level(), + std::move(def_level_reader), + std::move(rle_data_reader)); + }); + return res; + } + return std::make_unique>( + col_descriptor.max_definition_level(), + std::move(def_level_reader), + std::move(rle_data_reader), + *assert_cast(dictionary.get())); +} + + +template class ParquetLeafColReader; +template class ParquetLeafColReader; +template class ParquetLeafColReader; +template class ParquetLeafColReader; +template class ParquetLeafColReader; +template class ParquetLeafColReader; +template class ParquetLeafColReader; +template class ParquetLeafColReader>; +template class ParquetLeafColReader>; +template class ParquetLeafColReader>; +template class ParquetLeafColReader>; +template class ParquetLeafColReader>; + +} diff --git a/src/Processors/Formats/Impl/Parquet/ParquetLeafColReader.h b/src/Processors/Formats/Impl/Parquet/ParquetLeafColReader.h new file mode 100644 index 00000000000..c5b14132f17 --- /dev/null +++ b/src/Processors/Formats/Impl/Parquet/ParquetLeafColReader.h @@ -0,0 +1,62 @@ +#pragma once + +#include +#include + +#include "ParquetColumnReader.h" +#include "ParquetDataValuesReader.h" + +namespace parquet +{ + +class ColumnDescriptor; + +} + + +namespace DB +{ + +template +class ParquetLeafColReader : public ParquetColumnReader +{ +public: + ParquetLeafColReader( + const parquet::ColumnDescriptor & col_descriptor_, + DataTypePtr base_type_, + std::unique_ptr meta_, + std::unique_ptr reader_); + + ColumnWithTypeAndName readBatch(UInt64 rows_num, const String & name) override; + +private: + const parquet::ColumnDescriptor & col_descriptor; + DataTypePtr base_data_type; + std::unique_ptr col_chunk_meta; + std::unique_ptr parquet_page_reader; + std::unique_ptr data_values_reader; + + MutableColumnPtr column; + std::unique_ptr null_map; + + ColumnPtr dictionary; + + UInt64 reading_rows_num = 0; + UInt32 cur_page_values = 0; + bool reading_low_cardinality = false; + + Poco::Logger * log; + + void resetColumn(UInt64 rows_num); + void degradeDictionary(); + ColumnWithTypeAndName releaseColumn(const String & name); + + void readPage(); + void readPageV1(const parquet::DataPageV1 & page); + void readPageV2(const parquet::DataPageV2 & page); + + std::unique_ptr createDictReader( + std::unique_ptr def_level_reader, std::unique_ptr rle_data_reader); +}; + +} diff --git a/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp b/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp new file mode 100644 index 00000000000..a7e51f88b3c --- /dev/null +++ b/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp @@ -0,0 +1,406 @@ +#include "ParquetRecordReader.h" + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "ParquetLeafColReader.h" + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NOT_IMPLEMENTED; + extern const int PARQUET_EXCEPTION; +} + +#define THROW_PARQUET_EXCEPTION(s) \ + do \ + { \ + try { (s); } \ + catch (const ::parquet::ParquetException & e) \ + { \ + throw Exception(ErrorCodes::PARQUET_EXCEPTION, "Parquet exception: {}", e.what()); \ + } \ + } while (false) + +namespace +{ + +std::unique_ptr createFileReader( + std::shared_ptr<::arrow::io::RandomAccessFile> arrow_file, + std::shared_ptr metadata = nullptr) +{ + std::unique_ptr res; + THROW_PARQUET_EXCEPTION(res = parquet::ParquetFileReader::Open( + std::move(arrow_file), + parquet::default_reader_properties(), + metadata)); + return res; +} + +class ColReaderFactory +{ +public: + ColReaderFactory( + const parquet::ArrowReaderProperties & reader_properties_, + const parquet::ColumnDescriptor & col_descriptor_, + DataTypePtr ch_type_, + std::unique_ptr meta_, + std::unique_ptr page_reader_) + : reader_properties(reader_properties_) + , col_descriptor(col_descriptor_) + , ch_type(std::move(ch_type_)) + , meta(std::move(meta_)) + , page_reader(std::move(page_reader_)) {} + + std::unique_ptr makeReader(); + +private: + const parquet::ArrowReaderProperties & reader_properties; + const parquet::ColumnDescriptor & col_descriptor; + DataTypePtr ch_type; + std::unique_ptr meta; + std::unique_ptr page_reader; + + + UInt32 getScaleFromLogicalTimestamp(parquet::LogicalType::TimeUnit::unit tm_unit); + UInt32 getScaleFromArrowTimeUnit(arrow::TimeUnit::type tm_unit); + + std::unique_ptr fromInt32(); + std::unique_ptr fromInt64(); + std::unique_ptr fromByteArray(); + std::unique_ptr fromFLBA(); + + std::unique_ptr fromInt32INT(const parquet::IntLogicalType & int_type); + std::unique_ptr fromInt64INT(const parquet::IntLogicalType & int_type); + + template + auto makeLeafReader() + { + return std::make_unique>( + col_descriptor, std::make_shared(), std::move(meta), std::move(page_reader)); + } + + template + auto makeDecimalLeafReader() + { + auto data_type = std::make_shared>( + col_descriptor.type_precision(), col_descriptor.type_scale()); + return std::make_unique>>( + col_descriptor, std::move(data_type), std::move(meta), std::move(page_reader)); + } + + std::unique_ptr throwUnsupported(std::string msg = "") + { + throw Exception( + ErrorCodes::PARQUET_EXCEPTION, + "Unsupported logical type: {} and physical type: {} for field =={}=={}", + col_descriptor.logical_type()->ToString(), col_descriptor.physical_type(), col_descriptor.name(), msg); + } +}; + +UInt32 ColReaderFactory::getScaleFromLogicalTimestamp(parquet::LogicalType::TimeUnit::unit tm_unit) +{ + switch (tm_unit) + { + case parquet::LogicalType::TimeUnit::MILLIS: + return 3; + case parquet::LogicalType::TimeUnit::MICROS: + return 6; + case parquet::LogicalType::TimeUnit::NANOS: + return 9; + default: + throwUnsupported(PreformattedMessage::create(", invalid timestamp unit: {}", tm_unit)); + return 0; + } +} + +UInt32 ColReaderFactory::getScaleFromArrowTimeUnit(arrow::TimeUnit::type tm_unit) +{ + switch (tm_unit) + { + case arrow::TimeUnit::MILLI: + return 3; + case arrow::TimeUnit::MICRO: + return 6; + case arrow::TimeUnit::NANO: + return 9; + default: + throwUnsupported(PreformattedMessage::create(", invalid arrow time unit: {}", tm_unit)); + return 0; + } +} + +std::unique_ptr ColReaderFactory::fromInt32() +{ + switch (col_descriptor.logical_type()->type()) + { + case parquet::LogicalType::Type::INT: + return fromInt32INT(dynamic_cast(*col_descriptor.logical_type())); + case parquet::LogicalType::Type::NONE: + return makeLeafReader(); + case parquet::LogicalType::Type::DATE: + return makeLeafReader(); + case parquet::LogicalType::Type::DECIMAL: + return makeDecimalLeafReader(); + default: + return throwUnsupported(); + } +} + +std::unique_ptr ColReaderFactory::fromInt64() +{ + switch (col_descriptor.logical_type()->type()) + { + case parquet::LogicalType::Type::INT: + return fromInt64INT(dynamic_cast(*col_descriptor.logical_type())); + case parquet::LogicalType::Type::NONE: + return makeLeafReader(); + case parquet::LogicalType::Type::TIMESTAMP: + { + const auto & tm_type = dynamic_cast(*col_descriptor.logical_type()); + auto read_type = std::make_shared(getScaleFromLogicalTimestamp(tm_type.time_unit())); + return std::make_unique>>( + col_descriptor, std::move(read_type), std::move(meta), std::move(page_reader)); + } + case parquet::LogicalType::Type::DECIMAL: + return makeDecimalLeafReader(); + default: + return throwUnsupported(); + } +} + +std::unique_ptr ColReaderFactory::fromByteArray() +{ + switch (col_descriptor.logical_type()->type()) + { + case parquet::LogicalType::Type::STRING: + case parquet::LogicalType::Type::NONE: + return makeLeafReader(); + default: + return throwUnsupported(); + } +} + +std::unique_ptr ColReaderFactory::fromFLBA() +{ + switch (col_descriptor.logical_type()->type()) + { + case parquet::LogicalType::Type::DECIMAL: + { + if (col_descriptor.type_length() > 0) + { + if (col_descriptor.type_length() <= static_cast(sizeof(Decimal128))) + return makeDecimalLeafReader(); + else if (col_descriptor.type_length() <= static_cast(sizeof(Decimal256))) + return makeDecimalLeafReader(); + } + + return throwUnsupported(PreformattedMessage::create( + ", invalid type length: {}", col_descriptor.type_length())); + } + default: + return throwUnsupported(); + } +} + +std::unique_ptr ColReaderFactory::fromInt32INT(const parquet::IntLogicalType & int_type) +{ + switch (int_type.bit_width()) + { + case 32: + { + if (int_type.is_signed()) + return makeLeafReader(); + else + return makeLeafReader(); + } + default: + return throwUnsupported(PreformattedMessage::create(", bit width: {}", int_type.bit_width())); + } +} + +std::unique_ptr ColReaderFactory::fromInt64INT(const parquet::IntLogicalType & int_type) +{ + switch (int_type.bit_width()) + { + case 64: + { + if (int_type.is_signed()) + return makeLeafReader(); + else + return makeLeafReader(); + } + default: + return throwUnsupported(PreformattedMessage::create(", bit width: {}", int_type.bit_width())); + } +} + +// refer: GetArrowType method in schema_internal.cc of arrow +std::unique_ptr ColReaderFactory::makeReader() +{ + // this method should to be called only once for each instance + SCOPE_EXIT({ page_reader = nullptr; }); + assert(page_reader); + + switch (col_descriptor.physical_type()) + { + case parquet::Type::BOOLEAN: + break; + case parquet::Type::INT32: + return fromInt32(); + case parquet::Type::INT64: + return fromInt64(); + case parquet::Type::INT96: + { + DataTypePtr read_type = ch_type; + if (!isDateTime64(ch_type)) + { + auto scale = getScaleFromArrowTimeUnit(reader_properties.coerce_int96_timestamp_unit()); + read_type = std::make_shared(scale); + } + return std::make_unique>>( + col_descriptor, read_type, std::move(meta), std::move(page_reader)); + } + case parquet::Type::FLOAT: + return makeLeafReader(); + case parquet::Type::DOUBLE: + return makeLeafReader(); + case parquet::Type::BYTE_ARRAY: + return fromByteArray(); + case parquet::Type::FIXED_LEN_BYTE_ARRAY: + return fromFLBA(); + default: + break; + } + + return throwUnsupported(); +} + +} // anonymous namespace + +ParquetRecordReader::ParquetRecordReader( + Block header_, + parquet::ArrowReaderProperties reader_properties_, + std::shared_ptr<::arrow::io::RandomAccessFile> arrow_file, + const FormatSettings & format_settings, + std::vector row_groups_indices_, + std::shared_ptr metadata) + : file_reader(createFileReader(std::move(arrow_file), std::move(metadata))) + , reader_properties(reader_properties_) + , header(std::move(header_)) + , max_block_size(format_settings.parquet.max_block_size) + , row_groups_indices(std::move(row_groups_indices_)) + , left_rows(getTotalRows(*file_reader->metadata())) +{ + log = &Poco::Logger::get("ParquetRecordReader"); + + std::unordered_map parquet_columns; + const auto * root = file_reader->metadata()->schema()->group_node(); + for (int i = 0; i < root->field_count(); ++i) + { + const auto & node = root->field(i); + parquet_columns.emplace(node->name(), node); + } + + parquet_col_indice.reserve(header.columns()); + column_readers.reserve(header.columns()); + for (const auto & col_with_name : header) + { + auto it = parquet_columns.find(col_with_name.name); + if (it == parquet_columns.end()) + throw Exception(ErrorCodes::PARQUET_EXCEPTION, "no column with '{}' in parquet file", col_with_name.name); + + const auto & node = it->second; + if (!node->is_primitive()) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "arrays and maps are not implemented in native parquet reader"); + + auto idx = file_reader->metadata()->schema()->ColumnIndex(*node); + chassert(idx >= 0); + parquet_col_indice.push_back(idx); + } + if (reader_properties.pre_buffer()) + { + THROW_PARQUET_EXCEPTION(file_reader->PreBuffer( + row_groups_indices, parquet_col_indice, reader_properties.io_context(), reader_properties.cache_options())); + } +} + +Chunk ParquetRecordReader::readChunk() +{ + if (!left_rows) + { + return Chunk{}; + } + if (!cur_row_group_left_rows) + { + loadNextRowGroup(); + } + + Columns columns(header.columns()); + auto num_rows_read = std::min(max_block_size, cur_row_group_left_rows); + for (size_t i = 0; i < header.columns(); i++) + { + columns[i] = castColumn( + column_readers[i]->readBatch(num_rows_read, header.getByPosition(i).name), + header.getByPosition(i).type); + } + left_rows -= num_rows_read; + cur_row_group_left_rows -= num_rows_read; + + return Chunk{std::move(columns), num_rows_read}; +} + +void ParquetRecordReader::loadNextRowGroup() +{ + Stopwatch watch(CLOCK_MONOTONIC); + cur_row_group_reader = file_reader->RowGroup(row_groups_indices[next_row_group_idx]); + + column_readers.clear(); + for (size_t i = 0; i < parquet_col_indice.size(); i++) + { + ColReaderFactory factory( + reader_properties, + *file_reader->metadata()->schema()->Column(parquet_col_indice[i]), + header.getByPosition(i).type, + cur_row_group_reader->metadata()->ColumnChunk(parquet_col_indice[i]), + cur_row_group_reader->GetColumnPageReader(parquet_col_indice[i])); + column_readers.emplace_back(factory.makeReader()); + } + + auto duration = watch.elapsedNanoseconds() / 1e6; + LOG_DEBUG(log, "begin to read row group {} consumed {} ms", row_groups_indices[next_row_group_idx], duration); + + ++next_row_group_idx; + cur_row_group_left_rows = cur_row_group_reader->metadata()->num_rows(); +} + +Int64 ParquetRecordReader::getTotalRows(const parquet::FileMetaData & meta_data) +{ + Int64 res = 0; + for (auto idx : row_groups_indices) + { + res += meta_data.RowGroup(idx)->num_rows(); + } + return res; +} + +} diff --git a/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.h b/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.h new file mode 100644 index 00000000000..2f728a586a0 --- /dev/null +++ b/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.h @@ -0,0 +1,53 @@ +#pragma once + +#include +#include +#include +#include + +#include +#include +#include + +#include "ParquetColumnReader.h" + +namespace DB +{ + +class ParquetRecordReader +{ +public: + ParquetRecordReader( + Block header_, + parquet::ArrowReaderProperties reader_properties_, + std::shared_ptr<::arrow::io::RandomAccessFile> arrow_file, + const FormatSettings & format_settings, + std::vector row_groups_indices_, + std::shared_ptr metadata = nullptr); + + Chunk readChunk(); + +private: + std::unique_ptr file_reader; + parquet::ArrowReaderProperties reader_properties; + + Block header; + + std::shared_ptr cur_row_group_reader; + ParquetColReaders column_readers; + + UInt64 max_block_size; + + std::vector parquet_col_indice; + std::vector row_groups_indices; + UInt64 left_rows; + UInt64 cur_row_group_left_rows = 0; + int next_row_group_idx = 0; + + Poco::Logger * log; + + void loadNextRowGroup(); + Int64 getTotalRows(const parquet::FileMetaData & meta_data); +}; + +} diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp index d41cb3447de..7fc7b9c3cab 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp @@ -3,6 +3,7 @@ #if USE_PARQUET +#include #include #include #include @@ -23,6 +24,7 @@ #include #include #include +#include namespace CurrentMetrics { @@ -392,6 +394,8 @@ void ParquetBlockInputFormat::initializeIfNeeded() { if (std::exchange(is_initialized, true)) return; + if (format_settings.parquet.use_native_reader) + LOG_INFO(&Poco::Logger::get("ParquetBlockInputFormat"), "using native parquet reader"); // Create arrow file adapter. // TODO: Make the adapter do prefetching on IO threads, based on the full set of ranges that @@ -479,23 +483,43 @@ void ParquetBlockInputFormat::initializeRowGroupBatchReader(size_t row_group_bat if (metadata->writer_version().VersionLt(parquet::ApplicationVersion::PARQUET_816_FIXED_VERSION())) properties.set_pre_buffer(false); - parquet::arrow::FileReaderBuilder builder; - THROW_ARROW_NOT_OK( - builder.Open(arrow_file, /* not to be confused with ArrowReaderProperties */ parquet::default_reader_properties(), metadata)); - builder.properties(properties); - // TODO: Pass custom memory_pool() to enable memory accounting with non-jemalloc allocators. - THROW_ARROW_NOT_OK(builder.Build(&row_group_batch.file_reader)); + if (format_settings.parquet.use_native_reader) + { +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunreachable-code" + if constexpr (std::endian::native != std::endian::little) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "parquet native reader only supports little endian system currently"); +#pragma clang diagnostic pop - THROW_ARROW_NOT_OK( - row_group_batch.file_reader->GetRecordBatchReader(row_group_batch.row_groups_idxs, column_indices, &row_group_batch.record_batch_reader)); + row_group_batch.native_record_reader = std::make_shared( + getPort().getHeader(), + std::move(properties), + arrow_file, + format_settings, + row_group_batch.row_groups_idxs); + } + else + { + parquet::arrow::FileReaderBuilder builder; + THROW_ARROW_NOT_OK( + builder.Open(arrow_file, /* not to be confused with ArrowReaderProperties */ parquet::default_reader_properties(), metadata)); + builder.properties(properties); + // TODO: Pass custom memory_pool() to enable memory accounting with non-jemalloc allocators. + THROW_ARROW_NOT_OK(builder.Build(&row_group_batch.file_reader)); - row_group_batch.arrow_column_to_ch_column = std::make_unique( - getPort().getHeader(), - "Parquet", - format_settings.parquet.allow_missing_columns, - format_settings.null_as_default, - format_settings.date_time_overflow_behavior, - format_settings.parquet.case_insensitive_column_matching); + THROW_ARROW_NOT_OK( + row_group_batch.file_reader->GetRecordBatchReader(row_group_batch.row_groups_idxs, column_indices, &row_group_batch.record_batch_reader)); + + row_group_batch.arrow_column_to_ch_column = std::make_unique( + getPort().getHeader(), + "Parquet", + format_settings.parquet.allow_missing_columns, + format_settings.null_as_default, + format_settings.date_time_overflow_behavior, + format_settings.parquet.case_insensitive_column_matching); + } } void ParquetBlockInputFormat::scheduleRowGroup(size_t row_group_batch_idx) @@ -561,6 +585,7 @@ void ParquetBlockInputFormat::decodeOneChunk(size_t row_group_batch_idx, std::un lock.unlock(); auto end_of_row_group = [&] { + row_group_batch.native_record_reader.reset(); row_group_batch.arrow_column_to_ch_column.reset(); row_group_batch.record_batch_reader.reset(); row_group_batch.file_reader.reset(); @@ -573,35 +598,56 @@ void ParquetBlockInputFormat::decodeOneChunk(size_t row_group_batch_idx, std::un // reached. Wake up read() instead. condvar.notify_all(); }; - - if (!row_group_batch.record_batch_reader) - initializeRowGroupBatchReader(row_group_batch_idx); - - auto batch = row_group_batch.record_batch_reader->Next(); - if (!batch.ok()) - throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Error while reading Parquet data: {}", batch.status().ToString()); - - if (!*batch) + auto get_pending_chunk = [&](size_t num_rows, Chunk chunk = {}) { - end_of_row_group(); - return; - } - - auto tmp_table = arrow::Table::FromRecordBatches({*batch}); - - size_t approx_chunk_original_size = static_cast(std::ceil(static_cast(row_group_batch.total_bytes_compressed) / row_group_batch.total_rows * (*tmp_table)->num_rows())); - PendingChunk res = { - .chunk = {}, - .block_missing_values = {}, - .chunk_idx = row_group_batch.next_chunk_idx, - .row_group_batch_idx = row_group_batch_idx, - .approx_original_chunk_size = approx_chunk_original_size + size_t approx_chunk_original_size = static_cast(std::ceil( + static_cast(row_group_batch.total_bytes_compressed) / row_group_batch.total_rows * num_rows)); + return PendingChunk{ + .chunk = std::move(chunk), + .block_missing_values = {}, + .chunk_idx = row_group_batch.next_chunk_idx, + .row_group_batch_idx = row_group_batch_idx, + .approx_original_chunk_size = approx_chunk_original_size + }; }; - /// If defaults_for_omitted_fields is true, calculate the default values from default expression for omitted fields. - /// Otherwise fill the missing columns with zero values of its type. - BlockMissingValues * block_missing_values_ptr = format_settings.defaults_for_omitted_fields ? &res.block_missing_values : nullptr; - res.chunk = row_group_batch.arrow_column_to_ch_column->arrowTableToCHChunk(*tmp_table, (*tmp_table)->num_rows(), block_missing_values_ptr); + if (!row_group_batch.record_batch_reader && !row_group_batch.native_record_reader) + initializeRowGroupBatchReader(row_group_batch_idx); + + PendingChunk res; + if (format_settings.parquet.use_native_reader) + { + auto chunk = row_group_batch.native_record_reader->readChunk(); + if (!chunk) + { + end_of_row_group(); + return; + } + + // TODO support defaults_for_omitted_fields feature when supporting nested columns + auto num_rows = chunk.getNumRows(); + res = get_pending_chunk(num_rows, std::move(chunk)); + } + else + { + auto batch = row_group_batch.record_batch_reader->Next(); + if (!batch.ok()) + throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Error while reading Parquet data: {}", batch.status().ToString()); + + if (!*batch) + { + end_of_row_group(); + return; + } + + auto tmp_table = arrow::Table::FromRecordBatches({*batch}); + res = get_pending_chunk((*tmp_table)->num_rows()); + + /// If defaults_for_omitted_fields is true, calculate the default values from default expression for omitted fields. + /// Otherwise fill the missing columns with zero values of its type. + BlockMissingValues * block_missing_values_ptr = format_settings.defaults_for_omitted_fields ? &res.block_missing_values : nullptr; + res.chunk = row_group_batch.arrow_column_to_ch_column->arrowTableToCHChunk(*tmp_table, (*tmp_table)->num_rows(), block_missing_values_ptr); + } lock.lock(); diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.h b/src/Processors/Formats/Impl/ParquetBlockInputFormat.h index fc7e8eef95f..d6591f5c0a3 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.h +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.h @@ -16,6 +16,7 @@ namespace DB { class ArrowColumnToCHColumn; +class ParquetRecordReader; // Parquet files contain a metadata block with the following information: // * list of columns, @@ -210,6 +211,9 @@ private: std::vector row_groups_idxs; // These are only used by the decoding thread, so don't require locking the mutex. + // If use_native_reader, only native_record_reader is used; + // otherwise, only native_record_reader is not used. + std::shared_ptr native_record_reader; std::unique_ptr file_reader; std::shared_ptr record_batch_reader; std::unique_ptr arrow_column_to_ch_column; diff --git a/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp b/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp index 5382527fcdc..4d67bc1a4e9 100644 --- a/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp @@ -135,7 +135,7 @@ bool TSKVRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & ex /// If the key is not found, skip the value. NullOutput sink; - readEscapedStringInto(sink, *in); + readEscapedStringInto(sink, *in); } else { diff --git a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp index 09f8fa92e5f..6d4dcba9e60 100644 --- a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp @@ -10,6 +10,8 @@ #include #include #include +#include +#include "Formats/FormatSettings.h" namespace DB { @@ -28,7 +30,8 @@ static void checkForCarriageReturn(ReadBuffer & in) throw Exception(ErrorCodes::INCORRECT_DATA, "\nYou have carriage return (\\r, 0x0D, ASCII 13) at end of first row." "\nIt's like your input data has DOS/Windows style line separators, that are illegal in TabSeparated format." " You must transform your file to Unix format." - "\nBut if you really need carriage return at end of string value of last column, you need to escape it as \\r."); + "\nBut if you really need carriage return at end of string value of last column, you need to escape it as \\r" + "\nor else enable setting 'input_format_tsv_crlf_end_of_line'"); } TabSeparatedRowInputFormat::TabSeparatedRowInputFormat( @@ -92,7 +95,12 @@ void TabSeparatedFormatReader::skipRowEndDelimiter() if (buf->eof()) return; - if (unlikely(first_row)) + if (format_settings.tsv.crlf_end_of_line_input) + { + if (*buf->position() == '\r') + ++buf->position(); + } + else if (unlikely(first_row)) { checkForCarriageReturn(*buf); first_row = false; @@ -105,14 +113,15 @@ template String TabSeparatedFormatReader::readFieldIntoString() { String field; + bool support_crlf = format_settings.tsv.crlf_end_of_line_input; if (is_raw) readString(field, *buf); else { if constexpr (read_string) - readEscapedString(field, *buf); + support_crlf ? readEscapedStringCRLF(field, *buf) : readEscapedString(field, *buf); else - readTSVField(field, *buf); + support_crlf ? readTSVFieldCRLF(field, *buf) : readTSVField(field, *buf); } return field; } @@ -123,7 +132,7 @@ void TabSeparatedFormatReader::skipField() if (is_raw) readStringInto(out, *buf); else - readEscapedStringInto(out, *buf); + format_settings.tsv.crlf_end_of_line_input ? readEscapedStringInto(out, *buf) : readEscapedStringInto(out, *buf); } void TabSeparatedFormatReader::skipHeaderRow() @@ -155,7 +164,7 @@ bool TabSeparatedFormatReader::readField(IColumn & column, const DataTypePtr & t const SerializationPtr & serialization, bool is_last_file_column, const String & /*column_name*/) { const bool at_delimiter = !is_last_file_column && !buf->eof() && *buf->position() == '\t'; - const bool at_last_column_line_end = is_last_file_column && (buf->eof() || *buf->position() == '\n'); + const bool at_last_column_line_end = is_last_file_column && (buf->eof() || *buf->position() == '\n' || (format_settings.tsv.crlf_end_of_line_input && *buf->position() == '\r')); if (format_settings.tsv.empty_as_default && (at_delimiter || at_last_column_line_end)) { @@ -220,7 +229,10 @@ bool TabSeparatedFormatReader::parseRowEndWithDiagnosticInfo(WriteBuffer & out) try { - assertChar('\n', *buf); + if (!format_settings.tsv.crlf_end_of_line_input) + assertChar('\n', *buf); + else + assertChar('\r', *buf); } catch (const DB::Exception &) { @@ -233,7 +245,10 @@ bool TabSeparatedFormatReader::parseRowEndWithDiagnosticInfo(WriteBuffer & out) else if (*buf->position() == '\r') { out << "ERROR: Carriage return found where line feed is expected." - " It's like your file has DOS/Windows style line separators, that is illegal in TabSeparated format.\n"; + " It's like your file has DOS/Windows style line separators. \n" + "You must transform your file to Unix format. \n" + "But if you really need carriage return at end of string value of last column, you need to escape it as \\r \n" + "or else enable setting 'input_format_tsv_crlf_end_of_line'"; } else { @@ -348,7 +363,7 @@ void TabSeparatedFormatReader::skipRow() bool TabSeparatedFormatReader::checkForEndOfRow() { - return buf->eof() || *buf->position() == '\n'; + return buf->eof() || *buf->position() == '\n' || (format_settings.tsv.crlf_end_of_line_input && *buf->position() == '\r'); } TabSeparatedSchemaReader::TabSeparatedSchemaReader( diff --git a/src/Processors/Formats/Impl/TemplateRowInputFormat.h b/src/Processors/Formats/Impl/TemplateRowInputFormat.h index 38870473289..9a7bc03ea78 100644 --- a/src/Processors/Formats/Impl/TemplateRowInputFormat.h +++ b/src/Processors/Formats/Impl/TemplateRowInputFormat.h @@ -84,7 +84,7 @@ public: void readPrefix(); void skipField(EscapingRule escaping_rule); - inline void skipSpaces() { if (ignore_spaces) skipWhitespaceIfAny(*buf); } + void skipSpaces() { if (ignore_spaces) skipWhitespaceIfAny(*buf); } template ReturnType tryReadPrefixOrSuffix(size_t & input_part_beg, size_t input_part_end); diff --git a/src/Processors/IInflatingTransform.cpp b/src/Processors/IInflatingTransform.cpp index ffa5b55dc76..a59eda0feb2 100644 --- a/src/Processors/IInflatingTransform.cpp +++ b/src/Processors/IInflatingTransform.cpp @@ -45,8 +45,13 @@ IInflatingTransform::Status IInflatingTransform::prepare() { if (input.isFinished()) { - output.finish(); - return Status::Finished; + if (is_finished) + { + output.finish(); + return Status::Finished; + } + is_finished = true; + return Status::Ready; } input.setNeeded(); @@ -73,6 +78,14 @@ void IInflatingTransform::work() generated = true; can_generate = canGenerate(); } + else if (is_finished) + { + if (can_generate || generated || has_input) + throw Exception(ErrorCodes::LOGICAL_ERROR, "IInflatingTransform cannot finish work because it has generated data or has input data"); + + current_chunk = getRemaining(); + generated = !current_chunk.empty(); + } else { if (!has_input) diff --git a/src/Processors/IInflatingTransform.h b/src/Processors/IInflatingTransform.h index 0ad12f6cd65..0cb7fc06cc4 100644 --- a/src/Processors/IInflatingTransform.h +++ b/src/Processors/IInflatingTransform.h @@ -10,13 +10,14 @@ namespace DB /// for (chunk : input_chunks) /// { /// transform.consume(chunk); -/// /// while (transform.canGenerate()) /// { /// transformed_chunk = transform.generate(); /// ... (process transformed chunk) /// } /// } +/// transformed_chunk = transform.getRemaining(); +/// ... (process remaining data) /// class IInflatingTransform : public IProcessor { @@ -32,6 +33,7 @@ protected: virtual void consume(Chunk chunk) = 0; virtual bool canGenerate() = 0; virtual Chunk generate() = 0; + virtual Chunk getRemaining() { return {}; } public: IInflatingTransform(Block input_header, Block output_header); @@ -41,6 +43,9 @@ public: InputPort & getInputPort() { return input; } OutputPort & getOutputPort() { return output; } + + /// canGenerate can flush data when input is finished. + bool is_finished = false; }; } diff --git a/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp index 3bd0b532d90..a77bb0dabfc 100644 --- a/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp @@ -70,34 +70,12 @@ static AggregatingSortedAlgorithm::ColumnsDefinition defineColumns( return def; } -static MutableColumns getMergedColumns(const Block & header, const AggregatingSortedAlgorithm::ColumnsDefinition & def) -{ - MutableColumns columns; - columns.resize(header.columns()); - - for (const auto & desc : def.columns_to_simple_aggregate) - { - const auto & type = desc.nested_type ? desc.nested_type - : desc.real_type; - columns[desc.column_number] = type->createColumn(); - } - - for (size_t i = 0; i < columns.size(); ++i) - if (!columns[i]) - columns[i] = header.getByPosition(i).type->createColumn(); - - return columns; -} - /// Remove constants and LowCardinality for SimpleAggregateFunction static void preprocessChunk(Chunk & chunk, const AggregatingSortedAlgorithm::ColumnsDefinition & def) { auto num_rows = chunk.getNumRows(); auto columns = chunk.detachColumns(); - for (auto & column : columns) - column = column->convertToFullColumnIfConst(); - for (const auto & desc : def.columns_to_simple_aggregate) if (desc.nested_type) columns[desc.column_number] = recursiveRemoveLowCardinality(columns[desc.column_number]); @@ -159,12 +137,24 @@ AggregatingSortedAlgorithm::SimpleAggregateDescription::~SimpleAggregateDescript AggregatingSortedAlgorithm::AggregatingMergedData::AggregatingMergedData( - MutableColumns columns_, UInt64 max_block_size_rows_, UInt64 max_block_size_bytes_, ColumnsDefinition & def_) - : MergedData(std::move(columns_), false, max_block_size_rows_, max_block_size_bytes_), def(def_) + : MergedData(false, max_block_size_rows_, max_block_size_bytes_), def(def_) { +} + +void AggregatingSortedAlgorithm::AggregatingMergedData::initialize(const DB::Block & header, const IMergingAlgorithm::Inputs & inputs) +{ + MergedData::initialize(header, inputs); + + for (const auto & desc : def.columns_to_simple_aggregate) + { + const auto & type = desc.nested_type ? desc.nested_type + : desc.real_type; + columns[desc.column_number] = type->createColumn(); + } + initAggregateDescription(); /// Just to make startGroup() simpler. @@ -267,12 +257,15 @@ AggregatingSortedAlgorithm::AggregatingSortedAlgorithm( size_t max_block_size_bytes_) : IMergingAlgorithmWithDelayedChunk(header_, num_inputs, description_) , columns_definition(defineColumns(header_, description_)) - , merged_data(getMergedColumns(header_, columns_definition), max_block_size_rows_, max_block_size_bytes_, columns_definition) + , merged_data(max_block_size_rows_, max_block_size_bytes_, columns_definition) { } void AggregatingSortedAlgorithm::initialize(Inputs inputs) { + removeConstAndSparse(inputs); + merged_data.initialize(header, inputs); + for (auto & input : inputs) if (input.chunk) preprocessChunk(input.chunk, columns_definition); @@ -282,6 +275,7 @@ void AggregatingSortedAlgorithm::initialize(Inputs inputs) void AggregatingSortedAlgorithm::consume(Input & input, size_t source_num) { + removeConstAndSparse(input); preprocessChunk(input.chunk, columns_definition); updateCursor(input, source_num); } diff --git a/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.h b/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.h index db8ee66ab2b..53c103e7038 100644 --- a/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.h +++ b/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.h @@ -102,11 +102,12 @@ private: public: AggregatingMergedData( - MutableColumns columns_, UInt64 max_block_size_rows_, UInt64 max_block_size_bytes_, ColumnsDefinition & def_); + void initialize(const Block & header, const IMergingAlgorithm::Inputs & inputs) override; + /// Group is a group of rows with the same sorting key. It represents single row in result. /// Algorithm is: start group, add several rows, finish group. /// Then pull chunk when enough groups were added. diff --git a/src/Processors/Merges/Algorithms/CollapsingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/CollapsingSortedAlgorithm.cpp index 8948cee217c..07ee8f4ddef 100644 --- a/src/Processors/Merges/Algorithms/CollapsingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/CollapsingSortedAlgorithm.cpp @@ -31,8 +31,13 @@ CollapsingSortedAlgorithm::CollapsingSortedAlgorithm( LoggerPtr log_, WriteBuffer * out_row_sources_buf_, bool use_average_block_sizes) - : IMergingAlgorithmWithSharedChunks(header_, num_inputs, std::move(description_), out_row_sources_buf_, max_row_refs) - , merged_data(header_.cloneEmptyColumns(), use_average_block_sizes, max_block_size_rows_, max_block_size_bytes_) + : IMergingAlgorithmWithSharedChunks( + header_, + num_inputs, + std::move(description_), + out_row_sources_buf_, + max_row_refs, + std::make_unique(use_average_block_sizes, max_block_size_rows_, max_block_size_bytes_)) , sign_column_number(header_.getPositionByName(sign_column)) , only_positive_sign(only_positive_sign_) , log(log_) @@ -65,7 +70,7 @@ void CollapsingSortedAlgorithm::reportIncorrectData() void CollapsingSortedAlgorithm::insertRow(RowRef & row) { - merged_data.insertRow(*row.all_columns, row.row_num, row.owned_chunk->getNumRows()); + merged_data->insertRow(*row.all_columns, row.row_num, row.owned_chunk->getNumRows()); } std::optional CollapsingSortedAlgorithm::insertRows() @@ -90,8 +95,8 @@ std::optional CollapsingSortedAlgorithm::insertRows() if (count_positive >= count_negative) { - if (merged_data.hasEnoughRows()) - res = merged_data.pull(); + if (merged_data->hasEnoughRows()) + res = merged_data->pull(); insertRow(last_positive_row); @@ -121,8 +126,8 @@ std::optional CollapsingSortedAlgorithm::insertRows() IMergingAlgorithm::Status CollapsingSortedAlgorithm::merge() { /// Rare case, which may happen when index_granularity is 1, but we needed to insert 2 rows inside insertRows(). - if (merged_data.hasEnoughRows()) - return Status(merged_data.pull()); + if (merged_data->hasEnoughRows()) + return Status(merged_data->pull()); /// Take rows in required order and put them into `merged_data`, while the rows are no more than `max_block_size` while (queue.isValid()) @@ -148,8 +153,8 @@ IMergingAlgorithm::Status CollapsingSortedAlgorithm::merge() if (key_differs) { /// if there are enough rows and the last one is calculated completely - if (merged_data.hasEnoughRows()) - return Status(merged_data.pull()); + if (merged_data->hasEnoughRows()) + return Status(merged_data->pull()); /// We write data for the previous primary key. auto res = insertRows(); @@ -220,7 +225,7 @@ IMergingAlgorithm::Status CollapsingSortedAlgorithm::merge() return Status(std::move(*res)); } - return Status(merged_data.pull(), true); + return Status(merged_data->pull(), true); } } diff --git a/src/Processors/Merges/Algorithms/CollapsingSortedAlgorithm.h b/src/Processors/Merges/Algorithms/CollapsingSortedAlgorithm.h index be1a3a3bf33..99fd95d82d9 100644 --- a/src/Processors/Merges/Algorithms/CollapsingSortedAlgorithm.h +++ b/src/Processors/Merges/Algorithms/CollapsingSortedAlgorithm.h @@ -42,8 +42,6 @@ public: Status merge() override; private: - MergedData merged_data; - const size_t sign_column_number; const bool only_positive_sign; diff --git a/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp b/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp index a5befca7233..466adf93538 100644 --- a/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp @@ -40,6 +40,7 @@ FinishAggregatingInOrderAlgorithm::FinishAggregatingInOrderAlgorithm( void FinishAggregatingInOrderAlgorithm::initialize(Inputs inputs) { + removeConstAndSparse(inputs); current_inputs = std::move(inputs); states.resize(num_inputs); for (size_t i = 0; i < num_inputs; ++i) @@ -48,6 +49,7 @@ void FinishAggregatingInOrderAlgorithm::initialize(Inputs inputs) void FinishAggregatingInOrderAlgorithm::consume(Input & input, size_t source_num) { + removeConstAndSparse(input); if (!input.chunk.hasRows()) return; diff --git a/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.cpp index 814625d7aee..2b891592b20 100644 --- a/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.cpp @@ -46,8 +46,8 @@ GraphiteRollupSortedAlgorithm::GraphiteRollupSortedAlgorithm( size_t max_block_size_bytes_, Graphite::Params params_, time_t time_of_merge_) - : IMergingAlgorithmWithSharedChunks(header_, num_inputs, std::move(description_), nullptr, max_row_refs) - , merged_data(header_.cloneEmptyColumns(), false, max_block_size_rows_, max_block_size_bytes_) + : IMergingAlgorithmWithSharedChunks(header_, num_inputs, std::move(description_), nullptr, max_row_refs, std::make_unique(false, max_block_size_rows_, max_block_size_bytes_)) + , graphite_rollup_merged_data(assert_cast(*merged_data)) , params(std::move(params_)) , time_of_merge(time_of_merge_) { @@ -63,7 +63,7 @@ GraphiteRollupSortedAlgorithm::GraphiteRollupSortedAlgorithm( } } - merged_data.allocMemForAggregates(max_size_of_aggregate_state, max_alignment_of_aggregate_state); + graphite_rollup_merged_data.allocMemForAggregates(max_size_of_aggregate_state, max_alignment_of_aggregate_state); columns_definition = defineColumns(header_, params); } @@ -113,7 +113,7 @@ IMergingAlgorithm::Status GraphiteRollupSortedAlgorithm::merge() const DateLUTImpl & date_lut = timezone ? timezone->getTimeZone() : DateLUT::instance(); - /// Take rows in needed order and put them into `merged_data` until we get `max_block_size` rows. + /// Take rows in needed order and put them into `graphite_rollup_merged_data` until we get `max_block_size` rows. /// /// Variables starting with current_* refer to the rows previously popped from the queue that will /// contribute towards current output row. @@ -142,10 +142,10 @@ IMergingAlgorithm::Status GraphiteRollupSortedAlgorithm::merge() if (is_new_key) { /// Accumulate the row that has maximum version in the previous group of rows with the same key: - if (merged_data.wasGroupStarted()) + if (graphite_rollup_merged_data.wasGroupStarted()) accumulateRow(current_subgroup_newest_row); - Graphite::RollupRule next_rule = merged_data.currentRule(); + Graphite::RollupRule next_rule = graphite_rollup_merged_data.currentRule(); if (new_path) next_rule = selectPatternForPath(this->params, next_path); @@ -167,15 +167,15 @@ IMergingAlgorithm::Status GraphiteRollupSortedAlgorithm::merge() if (will_be_new_key) { - if (merged_data.wasGroupStarted()) + if (graphite_rollup_merged_data.wasGroupStarted()) { finishCurrentGroup(); /// We have enough rows - return, but don't advance the loop. At the beginning of the /// next call to merge() the same next_cursor will be processed once more and /// the next output row will be created from it. - if (merged_data.hasEnoughRows()) - return Status(merged_data.pull()); + if (graphite_rollup_merged_data.hasEnoughRows()) + return Status(graphite_rollup_merged_data.pull()); } /// At this point previous row has been fully processed, so we can advance the loop @@ -218,28 +218,28 @@ IMergingAlgorithm::Status GraphiteRollupSortedAlgorithm::merge() } /// Write result row for the last group. - if (merged_data.wasGroupStarted()) + if (graphite_rollup_merged_data.wasGroupStarted()) { accumulateRow(current_subgroup_newest_row); finishCurrentGroup(); } - return Status(merged_data.pull(), true); + return Status(graphite_rollup_merged_data.pull(), true); } void GraphiteRollupSortedAlgorithm::startNextGroup(SortCursor & cursor, Graphite::RollupRule next_rule) { - merged_data.startNextGroup(cursor->all_columns, cursor->getRow(), next_rule, columns_definition); + graphite_rollup_merged_data.startNextGroup(cursor->all_columns, cursor->getRow(), next_rule, columns_definition); } void GraphiteRollupSortedAlgorithm::finishCurrentGroup() { - merged_data.insertRow(current_time_rounded, current_subgroup_newest_row, columns_definition); + graphite_rollup_merged_data.insertRow(current_time_rounded, current_subgroup_newest_row, columns_definition); } void GraphiteRollupSortedAlgorithm::accumulateRow(RowRef & row) { - merged_data.accumulateRow(row, columns_definition); + graphite_rollup_merged_data.accumulateRow(row, columns_definition); } void GraphiteRollupSortedAlgorithm::GraphiteRollupMergedData::startNextGroup( diff --git a/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.h b/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.h index a20a6eaf11f..aaa3859efb6 100644 --- a/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.h +++ b/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.h @@ -53,7 +53,7 @@ public: { public: using MergedData::MergedData; - ~GraphiteRollupMergedData(); + ~GraphiteRollupMergedData() override; void startNextGroup(const ColumnRawPtrs & raw_columns, size_t row, Graphite::RollupRule next_rule, ColumnsDefinition & def); @@ -72,7 +72,7 @@ public: }; private: - GraphiteRollupMergedData merged_data; + GraphiteRollupMergedData & graphite_rollup_merged_data; const Graphite::Params params; ColumnsDefinition columns_definition; diff --git a/src/Processors/Merges/Algorithms/IMergingAlgorithm.h b/src/Processors/Merges/Algorithms/IMergingAlgorithm.h index 6e352c3f104..9a1c7c24270 100644 --- a/src/Processors/Merges/Algorithms/IMergingAlgorithm.h +++ b/src/Processors/Merges/Algorithms/IMergingAlgorithm.h @@ -39,7 +39,6 @@ public: void set(Chunk chunk_) { - convertToFullIfSparse(chunk_); chunk = std::move(chunk_); skip_last_row = false; } @@ -47,6 +46,18 @@ public: using Inputs = std::vector; + static void removeConstAndSparse(Input & input) + { + convertToFullIfConst(input.chunk); + convertToFullIfSparse(input.chunk); + } + + static void removeConstAndSparse(Inputs & inputs) + { + for (auto & input : inputs) + removeConstAndSparse(input); + } + virtual const char * getName() const = 0; virtual void initialize(Inputs inputs) = 0; virtual void consume(Input & input, size_t source_num) = 0; diff --git a/src/Processors/Merges/Algorithms/IMergingAlgorithmWithDelayedChunk.h b/src/Processors/Merges/Algorithms/IMergingAlgorithmWithDelayedChunk.h index b8e73aec0dc..cf4b8589441 100644 --- a/src/Processors/Merges/Algorithms/IMergingAlgorithmWithDelayedChunk.h +++ b/src/Processors/Merges/Algorithms/IMergingAlgorithmWithDelayedChunk.h @@ -34,9 +34,9 @@ protected: return !lhs.hasEqualSortColumnsWith(rhs); } -private: Block header; +private: /// Inputs currently being merged. Inputs current_inputs; SortCursorImpls cursors; diff --git a/src/Processors/Merges/Algorithms/IMergingAlgorithmWithSharedChunks.cpp b/src/Processors/Merges/Algorithms/IMergingAlgorithmWithSharedChunks.cpp index c8b69382e89..47b7ddf38dc 100644 --- a/src/Processors/Merges/Algorithms/IMergingAlgorithmWithSharedChunks.cpp +++ b/src/Processors/Merges/Algorithms/IMergingAlgorithmWithSharedChunks.cpp @@ -5,7 +5,7 @@ namespace DB { IMergingAlgorithmWithSharedChunks::IMergingAlgorithmWithSharedChunks( - Block header_, size_t num_inputs, SortDescription description_, WriteBuffer * out_row_sources_buf_, size_t max_row_refs) + Block header_, size_t num_inputs, SortDescription description_, WriteBuffer * out_row_sources_buf_, size_t max_row_refs, std::unique_ptr merged_data_) : header(std::move(header_)) , description(std::move(description_)) , chunk_allocator(num_inputs + max_row_refs) @@ -13,28 +13,20 @@ IMergingAlgorithmWithSharedChunks::IMergingAlgorithmWithSharedChunks( , sources(num_inputs) , sources_origin_merge_tree_part_level(num_inputs) , out_row_sources_buf(out_row_sources_buf_) + , merged_data(std::move(merged_data_)) { } -static void prepareChunk(Chunk & chunk) -{ - auto num_rows = chunk.getNumRows(); - auto columns = chunk.detachColumns(); - for (auto & column : columns) - column = column->convertToFullColumnIfConst(); - - chunk.setColumns(std::move(columns), num_rows); -} - void IMergingAlgorithmWithSharedChunks::initialize(Inputs inputs) { + removeConstAndSparse(inputs); + merged_data->initialize(header, inputs); + for (size_t source_num = 0; source_num < inputs.size(); ++source_num) { if (!inputs[source_num].chunk) continue; - prepareChunk(inputs[source_num].chunk); - auto & source = sources[source_num]; source.skip_last_row = inputs[source_num].skip_last_row; @@ -52,7 +44,7 @@ void IMergingAlgorithmWithSharedChunks::initialize(Inputs inputs) void IMergingAlgorithmWithSharedChunks::consume(Input & input, size_t source_num) { - prepareChunk(input.chunk); + removeConstAndSparse(input); auto & source = sources[source_num]; source.skip_last_row = input.skip_last_row; diff --git a/src/Processors/Merges/Algorithms/IMergingAlgorithmWithSharedChunks.h b/src/Processors/Merges/Algorithms/IMergingAlgorithmWithSharedChunks.h index 3b4f9e92c5d..bc1aafe93f7 100644 --- a/src/Processors/Merges/Algorithms/IMergingAlgorithmWithSharedChunks.h +++ b/src/Processors/Merges/Algorithms/IMergingAlgorithmWithSharedChunks.h @@ -1,6 +1,7 @@ #pragma once #include #include +#include #include namespace DB @@ -10,7 +11,7 @@ class IMergingAlgorithmWithSharedChunks : public IMergingAlgorithm { public: IMergingAlgorithmWithSharedChunks( - Block header_, size_t num_inputs, SortDescription description_, WriteBuffer * out_row_sources_buf_, size_t max_row_refs); + Block header_, size_t num_inputs, SortDescription description_, WriteBuffer * out_row_sources_buf_, size_t max_row_refs, std::unique_ptr merged_data_); void initialize(Inputs inputs) override; void consume(Input & input, size_t source_num) override; @@ -25,7 +26,6 @@ private: SortCursorImpls cursors; protected: - struct Source { detail::SharedChunkPtr chunk; @@ -43,6 +43,8 @@ protected: /// If it is not nullptr then it should be populated during execution WriteBuffer * out_row_sources_buf = nullptr; + std::unique_ptr merged_data; + using RowRef = detail::RowRefWithOwnedChunk; void setRowRef(RowRef & row, SortCursor & cursor) { row.set(cursor, sources[cursor.impl->order].chunk); } bool skipLastRowFor(size_t input_number) const { return sources[input_number].skip_last_row; } diff --git a/src/Processors/Merges/Algorithms/MergedData.h b/src/Processors/Merges/Algorithms/MergedData.h index 7ffde835ad0..c5bb074bb0c 100644 --- a/src/Processors/Merges/Algorithms/MergedData.h +++ b/src/Processors/Merges/Algorithms/MergedData.h @@ -1,7 +1,9 @@ #pragma once #include +#include #include +#include #include #include @@ -19,17 +21,40 @@ namespace ErrorCodes class MergedData { public: - explicit MergedData(MutableColumns columns_, bool use_average_block_size_, UInt64 max_block_size_, UInt64 max_block_size_bytes_) - : columns(std::move(columns_)), max_block_size(max_block_size_), max_block_size_bytes(max_block_size_bytes_), use_average_block_size(use_average_block_size_) + explicit MergedData(bool use_average_block_size_, UInt64 max_block_size_, UInt64 max_block_size_bytes_) + : max_block_size(max_block_size_), max_block_size_bytes(max_block_size_bytes_), use_average_block_size(use_average_block_size_) { } + virtual void initialize(const Block & header, const IMergingAlgorithm::Inputs & inputs) + { + columns = header.cloneEmptyColumns(); + std::vector source_columns; + source_columns.resize(columns.size()); + for (const auto & input : inputs) + { + if (!input.chunk) + continue; + + const auto & input_columns = input.chunk.getColumns(); + for (size_t i = 0; i != input_columns.size(); ++i) + source_columns[i].push_back(input_columns[i]); + } + + for (size_t i = 0; i != columns.size(); ++i) + { + if (columns[i]->hasDynamicStructure()) + columns[i]->takeDynamicStructureFromSourceColumns(source_columns[i]); + } + } + /// Pull will be called at next prepare call. void flush() { need_flush = true; } void insertRow(const ColumnRawPtrs & raw_columns, size_t row, size_t block_size) { size_t num_columns = raw_columns.size(); + chassert(columns.size() == num_columns); for (size_t i = 0; i < num_columns; ++i) columns[i]->insertFrom(*raw_columns[i], row); @@ -41,6 +66,7 @@ public: void insertRows(const ColumnRawPtrs & raw_columns, size_t start_index, size_t length, size_t block_size) { size_t num_columns = raw_columns.size(); + chassert(columns.size() == num_columns); for (size_t i = 0; i < num_columns; ++i) { if (length == 1) @@ -61,6 +87,7 @@ public: UInt64 num_rows = chunk.getNumRows(); UInt64 num_columns = chunk.getNumColumns(); + chassert(columns.size() == num_columns); auto chunk_columns = chunk.mutateColumns(); /// Here is a special code for constant columns. @@ -69,9 +96,21 @@ public: for (size_t i = 0; i < num_columns; ++i) { if (isColumnConst(*columns[i])) + { columns[i] = columns[i]->cloneResized(num_rows); + } + /// For columns with Dynamic structure we cannot just take column from input chunk because resulting column may have + /// different Dynamic structure (and have some merge statistics after calling takeDynamicStructureFromSourceColumns). + /// We should insert into data resulting column using insertRangeFrom. + else if (columns[i]->hasDynamicStructure()) + { + columns[i] = columns[i]->cloneEmpty(); + columns[i]->insertRangeFrom(*chunk_columns[i], 0, num_rows); + } else + { columns[i] = std::move(chunk_columns[i]); + } } if (rows_size < num_rows) @@ -144,6 +183,8 @@ public: UInt64 totalAllocatedBytes() const { return total_allocated_bytes; } UInt64 maxBlockSize() const { return max_block_size; } + virtual ~MergedData() = default; + protected: MutableColumns columns; diff --git a/src/Processors/Merges/Algorithms/MergingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/MergingSortedAlgorithm.cpp index 408d9a16c31..3a9cf7ee141 100644 --- a/src/Processors/Merges/Algorithms/MergingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/MergingSortedAlgorithm.cpp @@ -18,7 +18,7 @@ MergingSortedAlgorithm::MergingSortedAlgorithm( WriteBuffer * out_row_sources_buf_, bool use_average_block_sizes) : header(std::move(header_)) - , merged_data(header.cloneEmptyColumns(), use_average_block_sizes, max_block_size_, max_block_size_bytes_) + , merged_data(use_average_block_sizes, max_block_size_, max_block_size_bytes_) , description(description_) , limit(limit_) , out_row_sources_buf(out_row_sources_buf_) @@ -49,16 +49,16 @@ void MergingSortedAlgorithm::addInput() void MergingSortedAlgorithm::initialize(Inputs inputs) { + removeConstAndSparse(inputs); + merged_data.initialize(header, inputs); current_inputs = std::move(inputs); for (size_t source_num = 0; source_num < current_inputs.size(); ++source_num) { auto & chunk = current_inputs[source_num].chunk; - if (!chunk) continue; - convertToFullIfConst(chunk); cursors[source_num] = SortCursorImpl(header, chunk.getColumns(), description, source_num); } @@ -82,7 +82,7 @@ void MergingSortedAlgorithm::initialize(Inputs inputs) void MergingSortedAlgorithm::consume(Input & input, size_t source_num) { - convertToFullIfConst(input.chunk); + removeConstAndSparse(input); current_inputs[source_num].swap(input); cursors[source_num].reset(current_inputs[source_num].chunk.getColumns(), header); diff --git a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp index 9e5c1249c4e..7b2c7d82a01 100644 --- a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp @@ -41,9 +41,8 @@ ReplacingSortedAlgorithm::ReplacingSortedAlgorithm( bool use_average_block_sizes, bool cleanup_, bool enable_vertical_final_) - : IMergingAlgorithmWithSharedChunks(header_, num_inputs, std::move(description_), out_row_sources_buf_, max_row_refs) - , merged_data(header_.cloneEmptyColumns(), use_average_block_sizes, max_block_size_rows, max_block_size_bytes), cleanup(cleanup_) - , enable_vertical_final(enable_vertical_final_) + : IMergingAlgorithmWithSharedChunks(header_, num_inputs, std::move(description_), out_row_sources_buf_, max_row_refs, std::make_unique(use_average_block_sizes, max_block_size_rows, max_block_size_bytes)) + , cleanup(cleanup_), enable_vertical_final(enable_vertical_final_) { if (!is_deleted_column.empty()) is_deleted_column_number = header_.getPositionByName(is_deleted_column); @@ -75,7 +74,7 @@ void ReplacingSortedAlgorithm::insertRow() to_be_emitted.push(std::move(selected_row.owned_chunk)); } else - merged_data.insertRow(*selected_row.all_columns, selected_row.row_num, selected_row.owned_chunk->getNumRows()); + merged_data->insertRow(*selected_row.all_columns, selected_row.row_num, selected_row.owned_chunk->getNumRows()); selected_row.clear(); } @@ -109,8 +108,8 @@ IMergingAlgorithm::Status ReplacingSortedAlgorithm::merge() if (key_differs) { /// If there are enough rows and the last one is calculated completely - if (merged_data.hasEnoughRows()) - return Status(merged_data.pull()); + if (merged_data->hasEnoughRows()) + return Status(merged_data->pull()); /// Write the data for the previous primary key. if (!selected_row.empty()) @@ -168,8 +167,8 @@ IMergingAlgorithm::Status ReplacingSortedAlgorithm::merge() } /// If have enough rows, return block, because it prohibited to overflow requested number of rows. - if (merged_data.hasEnoughRows()) - return Status(merged_data.pull()); + if (merged_data->hasEnoughRows()) + return Status(merged_data->pull()); /// We will write the data for the last primary key. if (!selected_row.empty()) @@ -193,7 +192,7 @@ IMergingAlgorithm::Status ReplacingSortedAlgorithm::merge() return emitChunk(chunk, to_be_emitted.empty()); } - return Status(merged_data.pull(), true); + return Status(merged_data->pull(), true); } void ReplacingSortedAlgorithm::saveChunkForSkippingFinalFromSelectedRow() diff --git a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h index 2fbd73c9072..a3ccccf0845 100644 --- a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h +++ b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h @@ -44,8 +44,6 @@ public: Status merge() override; private: - MergedData merged_data; - ssize_t is_deleted_column_number = -1; ssize_t version_column_number = -1; bool cleanup = false; diff --git a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp index df27520856e..e2c6371c44f 100644 --- a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp @@ -382,47 +382,11 @@ static SummingSortedAlgorithm::ColumnsDefinition defineColumns( return def; } -static MutableColumns getMergedDataColumns( - const Block & header, - const SummingSortedAlgorithm::ColumnsDefinition & def) -{ - MutableColumns columns; - size_t num_columns = def.column_numbers_not_to_aggregate.size() + def.columns_to_aggregate.size(); - columns.reserve(num_columns); - - for (const auto & desc : def.columns_to_aggregate) - { - // Wrap aggregated columns in a tuple to match function signature - if (!desc.is_agg_func_type && !desc.is_simple_agg_func_type && isTuple(desc.function->getResultType())) - { - size_t tuple_size = desc.column_numbers.size(); - MutableColumns tuple_columns(tuple_size); - for (size_t i = 0; i < tuple_size; ++i) - tuple_columns[i] = header.safeGetByPosition(desc.column_numbers[i]).column->cloneEmpty(); - - columns.emplace_back(ColumnTuple::create(std::move(tuple_columns))); - } - else - { - const auto & type = desc.nested_type ? desc.nested_type : desc.real_type; - columns.emplace_back(type->createColumn()); - } - } - - for (const auto & column_number : def.column_numbers_not_to_aggregate) - columns.emplace_back(header.safeGetByPosition(column_number).type->createColumn()); - - return columns; -} - static void preprocessChunk(Chunk & chunk, const SummingSortedAlgorithm::ColumnsDefinition & def) { auto num_rows = chunk.getNumRows(); auto columns = chunk.detachColumns(); - for (auto & column : columns) - column = column->convertToFullColumnIfConst(); - for (const auto & desc : def.columns_to_aggregate) { if (desc.nested_type) @@ -504,11 +468,44 @@ static void setRow(Row & row, const ColumnRawPtrs & raw_columns, size_t row_num, } -SummingSortedAlgorithm::SummingMergedData::SummingMergedData( - MutableColumns columns_, UInt64 max_block_size_rows_, UInt64 max_block_size_bytes_, ColumnsDefinition & def_) - : MergedData(std::move(columns_), false, max_block_size_rows_, max_block_size_bytes_) +SummingSortedAlgorithm::SummingMergedData::SummingMergedData(UInt64 max_block_size_rows_, UInt64 max_block_size_bytes_, ColumnsDefinition & def_) + : MergedData(false, max_block_size_rows_, max_block_size_bytes_) , def(def_) { +} + +void SummingSortedAlgorithm::SummingMergedData::initialize(const DB::Block & header, const IMergingAlgorithm::Inputs & inputs) +{ + MergedData::initialize(header, inputs); + + MutableColumns new_columns; + size_t num_columns = def.column_numbers_not_to_aggregate.size() + def.columns_to_aggregate.size(); + new_columns.reserve(num_columns); + + for (const auto & desc : def.columns_to_aggregate) + { + // Wrap aggregated columns in a tuple to match function signature + if (!desc.is_agg_func_type && !desc.is_simple_agg_func_type && isTuple(desc.function->getResultType())) + { + size_t tuple_size = desc.column_numbers.size(); + MutableColumns tuple_columns(tuple_size); + for (size_t i = 0; i < tuple_size; ++i) + tuple_columns[i] = std::move(columns[desc.column_numbers[i]]); + + new_columns.emplace_back(ColumnTuple::create(std::move(tuple_columns))); + } + else + { + const auto & type = desc.nested_type ? desc.nested_type : desc.real_type; + new_columns.emplace_back(type->createColumn()); + } + } + + for (const auto & column_number : def.column_numbers_not_to_aggregate) + new_columns.emplace_back(std::move(columns[column_number])); + + columns = std::move(new_columns); + current_row.resize(def.column_names.size()); initAggregateDescription(); @@ -698,12 +695,15 @@ SummingSortedAlgorithm::SummingSortedAlgorithm( size_t max_block_size_bytes) : IMergingAlgorithmWithDelayedChunk(header_, num_inputs, std::move(description_)) , columns_definition(defineColumns(header_, description, column_names_to_sum, partition_key_columns)) - , merged_data(getMergedDataColumns(header_, columns_definition), max_block_size_rows, max_block_size_bytes, columns_definition) + , merged_data(max_block_size_rows, max_block_size_bytes, columns_definition) { } void SummingSortedAlgorithm::initialize(Inputs inputs) { + removeConstAndSparse(inputs); + merged_data.initialize(header, inputs); + for (auto & input : inputs) if (input.chunk) preprocessChunk(input.chunk, columns_definition); @@ -713,6 +713,7 @@ void SummingSortedAlgorithm::initialize(Inputs inputs) void SummingSortedAlgorithm::consume(Input & input, size_t source_num) { + removeConstAndSparse(input); preprocessChunk(input.chunk, columns_definition); updateCursor(input, source_num); } diff --git a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.h b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.h index dbbe4e53a5f..664b171c4b9 100644 --- a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.h +++ b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.h @@ -65,7 +65,9 @@ public: using MergedData::insertRow; public: - SummingMergedData(MutableColumns columns_, UInt64 max_block_size_rows, UInt64 max_block_size_bytes_, ColumnsDefinition & def_); + SummingMergedData(UInt64 max_block_size_rows, UInt64 max_block_size_bytes_, ColumnsDefinition & def_); + + void initialize(const Block & header, const IMergingAlgorithm::Inputs & inputs) override; void startGroup(ColumnRawPtrs & raw_columns, size_t row); void finishGroup(); diff --git a/src/Processors/Merges/Algorithms/VersionedCollapsingAlgorithm.cpp b/src/Processors/Merges/Algorithms/VersionedCollapsingAlgorithm.cpp index e7a431dc1d0..9f124c6ba18 100644 --- a/src/Processors/Merges/Algorithms/VersionedCollapsingAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/VersionedCollapsingAlgorithm.cpp @@ -16,8 +16,7 @@ VersionedCollapsingAlgorithm::VersionedCollapsingAlgorithm( size_t max_block_size_bytes_, WriteBuffer * out_row_sources_buf_, bool use_average_block_sizes) - : IMergingAlgorithmWithSharedChunks(header_, num_inputs, std::move(description_), out_row_sources_buf_, MAX_ROWS_IN_MULTIVERSION_QUEUE) - , merged_data(header_.cloneEmptyColumns(), use_average_block_sizes, max_block_size_rows_, max_block_size_bytes_) + : IMergingAlgorithmWithSharedChunks(header_, num_inputs, std::move(description_), out_row_sources_buf_, MAX_ROWS_IN_MULTIVERSION_QUEUE, std::make_unique(use_average_block_sizes, max_block_size_rows_, max_block_size_bytes_)) /// -1 for +1 in FixedSizeDequeWithGaps's internal buffer. 3 is a reasonable minimum size to collapse anything. , max_rows_in_queue(std::min(std::max(3, max_block_size_rows_), MAX_ROWS_IN_MULTIVERSION_QUEUE) - 1) , current_keys(max_rows_in_queue) @@ -47,7 +46,7 @@ void VersionedCollapsingAlgorithm::insertGap(size_t gap_size) void VersionedCollapsingAlgorithm::insertRow(size_t skip_rows, const RowRef & row) { - merged_data.insertRow(*row.all_columns, row.row_num, row.owned_chunk->getNumRows()); + merged_data->insertRow(*row.all_columns, row.row_num, row.owned_chunk->getNumRows()); insertGap(skip_rows); @@ -104,8 +103,8 @@ IMergingAlgorithm::Status VersionedCollapsingAlgorithm::merge() --num_rows_to_insert; /// It's ok to return here, because we didn't affect queue. - if (merged_data.hasEnoughRows()) - return Status(merged_data.pull()); + if (merged_data->hasEnoughRows()) + return Status(merged_data->pull()); } if (current_keys.empty()) @@ -147,13 +146,13 @@ IMergingAlgorithm::Status VersionedCollapsingAlgorithm::merge() insertRow(gap, row); current_keys.popFront(); - if (merged_data.hasEnoughRows()) - return Status(merged_data.pull()); + if (merged_data->hasEnoughRows()) + return Status(merged_data->pull()); } /// Write information about last collapsed rows. insertGap(current_keys.frontGap()); - return Status(merged_data.pull(), true); + return Status(merged_data->pull(), true); } } diff --git a/src/Processors/Merges/Algorithms/VersionedCollapsingAlgorithm.h b/src/Processors/Merges/Algorithms/VersionedCollapsingAlgorithm.h index d98529b301c..e6d20ddac75 100644 --- a/src/Processors/Merges/Algorithms/VersionedCollapsingAlgorithm.h +++ b/src/Processors/Merges/Algorithms/VersionedCollapsingAlgorithm.h @@ -29,8 +29,6 @@ public: Status merge() override; private: - MergedData merged_data; - size_t sign_column_number = 0; const size_t max_rows_in_queue; diff --git a/src/Processors/QueryPlan/PartsSplitter.cpp b/src/Processors/QueryPlan/PartsSplitter.cpp index 6d70aa8a60d..ed4b1906635 100644 --- a/src/Processors/QueryPlan/PartsSplitter.cpp +++ b/src/Processors/QueryPlan/PartsSplitter.cpp @@ -624,14 +624,11 @@ SplitPartsRangesResult splitPartsRanges(RangesInDataParts ranges_in_data_parts, } /// Process parts ranges with undefined value at end mark - bool is_intersecting = part_index_start_to_range.size() > 1; + /// The last parts ranges could be non-intersect only if: (1) there is only one part range left, (2) it belongs to a non-L0 part, + /// and (3) the begin value of this range is larger than the largest end value of all previous ranges. This is too complicated + /// to check, so we just add the last part ranges to the intersecting ranges. for (const auto & [part_range_index, mark_range] : part_index_start_to_range) - { - if (is_intersecting) - add_intersecting_range(part_range_index.part_index, mark_range); - else - add_non_intersecting_range(part_range_index.part_index, mark_range); - } + add_intersecting_range(part_range_index.part_index, mark_range); auto && non_intersecting_ranges_in_data_parts = std::move(non_intersecting_ranges_in_data_parts_builder.getCurrentRangesInDataParts()); auto && intersecting_ranges_in_data_parts = std::move(intersecting_ranges_in_data_parts_builder.getCurrentRangesInDataParts()); diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 21303cf2af2..198236c4f49 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -410,7 +410,7 @@ Pipe ReadFromMergeTree::readFromPoolParallelReplicas(RangesInDataParts parts_wit auto algorithm = std::make_unique(i); auto processor = std::make_unique( - pool, std::move(algorithm), storage_snapshot, prewhere_info, + pool, std::move(algorithm), prewhere_info, actions_settings, block_size_copy, reader_settings); auto source = std::make_shared(std::move(processor)); @@ -509,7 +509,7 @@ Pipe ReadFromMergeTree::readFromPool( auto algorithm = std::make_unique(i); auto processor = std::make_unique( - pool, std::move(algorithm), storage_snapshot, prewhere_info, + pool, std::move(algorithm), prewhere_info, actions_settings, block_size_copy, reader_settings); auto source = std::make_shared(std::move(processor)); @@ -621,7 +621,7 @@ Pipe ReadFromMergeTree::readInOrder( algorithm = std::make_unique(i); auto processor = std::make_unique( - pool, std::move(algorithm), storage_snapshot, prewhere_info, + pool, std::move(algorithm), prewhere_info, actions_settings, block_size, reader_settings); processor->addPartLevelToChunk(isQueryWithFinal()); diff --git a/src/Processors/QueryPlan/ReadFromRemote.cpp b/src/Processors/QueryPlan/ReadFromRemote.cpp index 6e6edfa1208..485d5f675ab 100644 --- a/src/Processors/QueryPlan/ReadFromRemote.cpp +++ b/src/Processors/QueryPlan/ReadFromRemote.cpp @@ -388,6 +388,8 @@ ReadFromParallelRemoteReplicasStep::ReadFromParallelRemoteReplicasStep( chassert(cluster->getShardCount() == 1); std::vector description; + description.push_back(fmt::format("query: {}", formattedAST(query_ast))); + for (const auto & pool : cluster->getShardsInfo().front().per_replica_pools) description.push_back(fmt::format("Replica: {}", pool->getHost())); diff --git a/src/Processors/Transforms/ColumnGathererTransform.cpp b/src/Processors/Transforms/ColumnGathererTransform.cpp index b2e8e9bc89e..15f8355bdc7 100644 --- a/src/Processors/Transforms/ColumnGathererTransform.cpp +++ b/src/Processors/Transforms/ColumnGathererTransform.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include @@ -20,11 +21,13 @@ ColumnGathererStream::ColumnGathererStream( size_t num_inputs, ReadBuffer & row_sources_buf_, size_t block_preferred_size_rows_, - size_t block_preferred_size_bytes_) + size_t block_preferred_size_bytes_, + bool is_result_sparse_) : sources(num_inputs) , row_sources_buf(row_sources_buf_) , block_preferred_size_rows(block_preferred_size_rows_) , block_preferred_size_bytes(block_preferred_size_bytes_) + , is_result_sparse(is_result_sparse_) { if (num_inputs == 0) throw Exception(ErrorCodes::EMPTY_DATA_PASSED, "There are no streams to gather"); @@ -32,15 +35,29 @@ ColumnGathererStream::ColumnGathererStream( void ColumnGathererStream::initialize(Inputs inputs) { + Columns source_columns; + source_columns.reserve(inputs.size()); for (size_t i = 0; i < inputs.size(); ++i) { - if (inputs[i].chunk) - { - sources[i].update(inputs[i].chunk.detachColumns().at(0)); - if (!result_column) - result_column = sources[i].column->cloneEmpty(); - } + if (!inputs[i].chunk) + continue; + + if (!is_result_sparse) + convertToFullIfSparse(inputs[i].chunk); + + sources[i].update(inputs[i].chunk.detachColumns().at(0)); + source_columns.push_back(sources[i].column); } + + if (source_columns.empty()) + return; + + result_column = source_columns[0]->cloneEmpty(); + if (is_result_sparse && !result_column->isSparse()) + result_column = ColumnSparse::create(std::move(result_column)); + + if (result_column->hasDynamicStructure()) + result_column->takeDynamicStructureFromSourceColumns(source_columns); } IMergingAlgorithm::Status ColumnGathererStream::merge() @@ -52,7 +69,19 @@ IMergingAlgorithm::Status ColumnGathererStream::merge() if (source_to_fully_copy) /// Was set on a previous iteration { Chunk res; - res.addColumn(source_to_fully_copy->column); + /// For columns with Dynamic structure we cannot just take column source_to_fully_copy because resulting column may have + /// different Dynamic structure (and have some merge statistics after calling takeDynamicStructureFromSourceColumns). + /// We should insert into data resulting column using insertRangeFrom. + if (result_column->hasDynamicStructure()) + { + auto col = result_column->cloneEmpty(); + col->insertRangeFrom(*source_to_fully_copy->column, 0, source_to_fully_copy->column->size()); + res.addColumn(std::move(col)); + } + else + { + res.addColumn(source_to_fully_copy->column); + } merged_rows += source_to_fully_copy->size; source_to_fully_copy->pos = source_to_fully_copy->size; source_to_fully_copy = nullptr; @@ -96,7 +125,16 @@ IMergingAlgorithm::Status ColumnGathererStream::merge() Chunk res; merged_rows += source_to_fully_copy->column->size(); merged_bytes += source_to_fully_copy->column->allocatedBytes(); - res.addColumn(source_to_fully_copy->column); + if (result_column->hasDynamicStructure()) + { + auto col = result_column->cloneEmpty(); + col->insertRangeFrom(*source_to_fully_copy->column, 0, source_to_fully_copy->column->size()); + res.addColumn(std::move(col)); + } + else + { + res.addColumn(source_to_fully_copy->column); + } source_to_fully_copy->pos = source_to_fully_copy->size; source_to_fully_copy = nullptr; return Status(std::move(res)); @@ -117,7 +155,12 @@ void ColumnGathererStream::consume(Input & input, size_t source_num) { auto & source = sources[source_num]; if (input.chunk) + { + if (!is_result_sparse) + convertToFullIfSparse(input.chunk); + source.update(input.chunk.getColumns().at(0)); + } if (0 == source.size) { @@ -130,10 +173,11 @@ ColumnGathererTransform::ColumnGathererTransform( size_t num_inputs, ReadBuffer & row_sources_buf_, size_t block_preferred_size_rows_, - size_t block_preferred_size_bytes_) + size_t block_preferred_size_bytes_, + bool is_result_sparse_) : IMergingTransform( num_inputs, header, header, /*have_all_inputs_=*/ true, /*limit_hint_=*/ 0, /*always_read_till_end_=*/ false, - num_inputs, row_sources_buf_, block_preferred_size_rows_, block_preferred_size_bytes_) + num_inputs, row_sources_buf_, block_preferred_size_rows_, block_preferred_size_bytes_, is_result_sparse_) , log(getLogger("ColumnGathererStream")) { if (header.columns() != 1) diff --git a/src/Processors/Transforms/ColumnGathererTransform.h b/src/Processors/Transforms/ColumnGathererTransform.h index 4e56cffa46a..ec5691316ce 100644 --- a/src/Processors/Transforms/ColumnGathererTransform.h +++ b/src/Processors/Transforms/ColumnGathererTransform.h @@ -60,7 +60,8 @@ public: size_t num_inputs, ReadBuffer & row_sources_buf_, size_t block_preferred_size_rows_, - size_t block_preferred_size_bytes_); + size_t block_preferred_size_bytes_, + bool is_result_sparse_); const char * getName() const override { return "ColumnGathererStream"; } void initialize(Inputs inputs) override; @@ -97,6 +98,7 @@ private: const size_t block_preferred_size_rows; const size_t block_preferred_size_bytes; + const bool is_result_sparse; Source * source_to_fully_copy = nullptr; @@ -113,7 +115,8 @@ public: size_t num_inputs, ReadBuffer & row_sources_buf_, size_t block_preferred_size_rows_, - size_t block_preferred_size_bytes_); + size_t block_preferred_size_bytes_, + bool is_result_sparse_); String getName() const override { return "ColumnGathererTransform"; } @@ -145,7 +148,6 @@ void ColumnGathererStream::gather(Column & column_res) next_required_source = -1; - /// We use do ... while here to ensure there will be at least one iteration of this loop. /// Because the column_res.byteSize() could be bigger than block_preferred_size_bytes already at this point. do diff --git a/src/Processors/Transforms/SquashingChunksTransform.cpp b/src/Processors/Transforms/SquashingChunksTransform.cpp index 0d69b6e0a8d..ed67dd508f3 100644 --- a/src/Processors/Transforms/SquashingChunksTransform.cpp +++ b/src/Processors/Transforms/SquashingChunksTransform.cpp @@ -56,49 +56,39 @@ void SquashingChunksTransform::work() SimpleSquashingChunksTransform::SimpleSquashingChunksTransform( const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes) - : ISimpleTransform(header, header, true), squashing(min_block_size_rows, min_block_size_bytes) + : IInflatingTransform(header, header), squashing(min_block_size_rows, min_block_size_bytes) { } -void SimpleSquashingChunksTransform::transform(Chunk & chunk) +void SimpleSquashingChunksTransform::consume(Chunk chunk) { - if (!finished) - { - if (auto block = squashing.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns()))) - chunk.setColumns(block.getColumns(), block.rows()); - } - else - { - if (chunk.hasRows()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk expected to be empty, otherwise it will be lost"); - - auto block = squashing.add({}); - chunk.setColumns(block.getColumns(), block.rows()); - } + Block current_block = squashing.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns())); + squashed_chunk.setColumns(current_block.getColumns(), current_block.rows()); } -IProcessor::Status SimpleSquashingChunksTransform::prepare() +Chunk SimpleSquashingChunksTransform::generate() { - if (!finished && input.isFinished()) - { - if (output.isFinished()) - return Status::Finished; + if (squashed_chunk.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't generate chunk in SimpleSquashingChunksTransform"); - if (!output.canPush()) - return Status::PortFull; + Chunk result_chunk; + result_chunk.swap(squashed_chunk); + return result_chunk; +} - if (has_output) - { - output.pushData(std::move(output_data)); - has_output = false; - return Status::PortFull; - } +bool SimpleSquashingChunksTransform::canGenerate() +{ + return !squashed_chunk.empty(); +} - finished = true; - /// On the next call to transform() we will return all data buffered in `squashing` (if any) - return Status::Ready; - } - return ISimpleTransform::prepare(); +Chunk SimpleSquashingChunksTransform::getRemaining() +{ + Block current_block = squashing.add({}); + squashed_chunk.setColumns(current_block.getColumns(), current_block.rows()); + + Chunk result_chunk; + result_chunk.swap(squashed_chunk); + return result_chunk; } } diff --git a/src/Processors/Transforms/SquashingChunksTransform.h b/src/Processors/Transforms/SquashingChunksTransform.h index f82e9e46a61..8c30a6032e4 100644 --- a/src/Processors/Transforms/SquashingChunksTransform.h +++ b/src/Processors/Transforms/SquashingChunksTransform.h @@ -2,6 +2,7 @@ #include #include +#include #include namespace DB @@ -29,7 +30,7 @@ private: }; /// Doesn't care about propagating exceptions and thus doesn't throw LOGICAL_ERROR if the following transform closes its input port. -class SimpleSquashingChunksTransform : public ISimpleTransform +class SimpleSquashingChunksTransform : public IInflatingTransform { public: explicit SimpleSquashingChunksTransform(const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes); @@ -37,14 +38,14 @@ public: String getName() const override { return "SimpleSquashingTransform"; } protected: - void transform(Chunk &) override; - - IProcessor::Status prepare() override; + void consume(Chunk chunk) override; + bool canGenerate() override; + Chunk generate() override; + Chunk getRemaining() override; private: SquashingTransform squashing; - - /// When consumption is finished we need to release the final chunk regardless of its size. - bool finished = false; + Chunk squashed_chunk; }; + } diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 5e8ecdca95e..cdcfad4442c 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -414,7 +414,8 @@ std::optional generateViewChain( out.getInputHeader(), view_id, nullptr, - std::move(runtime_stats)}); + std::move(runtime_stats), + insert_context}); if (type == QueryViewsLogElement::ViewType::MATERIALIZED) { @@ -590,7 +591,7 @@ Chain buildPushingToViewsChain( static QueryPipeline process(Block block, ViewRuntimeData & view, const ViewsData & views_data) { - const auto & context = views_data.context; + const auto & context = view.context; /// We create a table with the same name as original table and the same alias columns, /// but it will contain single block (that is INSERT-ed into main table). diff --git a/src/Processors/Transforms/buildPushingToViewsChain.h b/src/Processors/Transforms/buildPushingToViewsChain.h index 53aceeda1cc..a1feed91b60 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.h +++ b/src/Processors/Transforms/buildPushingToViewsChain.h @@ -33,6 +33,9 @@ struct ViewRuntimeData /// Info which is needed for query views log. std::unique_ptr runtime_stats; + /// An overridden context bounded to this view with the correct SQL security grants. + ContextPtr context; + void setException(std::exception_ptr e) { exception = e; diff --git a/src/Server/HTTPHandler.h b/src/Server/HTTPHandler.h index ae4cf034276..a96402247a2 100644 --- a/src/Server/HTTPHandler.h +++ b/src/Server/HTTPHandler.h @@ -77,12 +77,12 @@ private: bool exception_is_written = false; std::function exception_writer; - inline bool hasDelayed() const + bool hasDelayed() const { return out_maybe_delayed_and_compressed != out_maybe_compressed.get(); } - inline void finalize() + void finalize() { if (finalized) return; @@ -94,7 +94,7 @@ private: out->finalize(); } - inline bool isFinalized() const + bool isFinalized() const { return finalized; } diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 3db935729b4..e3a820340ad 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp index ef6c5f7362c..4879d1a16dc 100644 --- a/src/Storages/AlterCommands.cpp +++ b/src/Storages/AlterCommands.cpp @@ -1288,7 +1288,7 @@ void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const /// Looks like there is something around default expression for this column (method `getDefault` is not implemented for the data type Object). /// But after ALTER TABLE ADD COLUMN we need to fill existing rows with something (exactly the default value). /// So we don't allow to do it for now. - if (command.data_type->hasDynamicSubcolumns()) + if (command.data_type->hasDynamicSubcolumnsDeprecated()) throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Adding a new column of a type which has dynamic subcolumns to an existing table is not allowed. It has known bugs"); if (virtuals->tryGet(column_name, VirtualsKind::Persistent)) @@ -1366,8 +1366,8 @@ void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const const GetColumnsOptions options(GetColumnsOptions::All); const auto old_data_type = all_columns.getColumn(options, column_name).type; - bool new_type_has_object = command.data_type->hasDynamicSubcolumns(); - bool old_type_has_object = old_data_type->hasDynamicSubcolumns(); + bool new_type_has_object = command.data_type->hasDynamicSubcolumnsDeprecated(); + bool old_type_has_object = old_data_type->hasDynamicSubcolumnsDeprecated(); if (new_type_has_object || old_type_has_object) throw Exception( diff --git a/src/Storages/Cache/ExternalDataSourceCache.h b/src/Storages/Cache/ExternalDataSourceCache.h index a5dea2f63db..4c8c7974005 100644 --- a/src/Storages/Cache/ExternalDataSourceCache.h +++ b/src/Storages/Cache/ExternalDataSourceCache.h @@ -70,7 +70,7 @@ public: void initOnce(ContextPtr context, const String & root_dir_, size_t limit_size_, size_t bytes_read_before_flush_); - inline bool isInitialized() const { return initialized; } + bool isInitialized() const { return initialized; } std::pair, std::unique_ptr> createReader(ContextPtr context, IRemoteFileMetadataPtr remote_file_metadata, std::unique_ptr & read_buffer, bool is_random_accessed); diff --git a/src/Storages/Cache/RemoteCacheController.h b/src/Storages/Cache/RemoteCacheController.h index 782a6b89519..22b3d64b1db 100644 --- a/src/Storages/Cache/RemoteCacheController.h +++ b/src/Storages/Cache/RemoteCacheController.h @@ -45,41 +45,41 @@ public: */ void waitMoreData(size_t start_offset_, size_t end_offset_); - inline size_t size() const { return current_offset; } + size_t size() const { return current_offset; } - inline const std::filesystem::path & getLocalPath() { return local_path; } - inline String getRemotePath() const { return file_metadata_ptr->remote_path; } + const std::filesystem::path & getLocalPath() { return local_path; } + String getRemotePath() const { return file_metadata_ptr->remote_path; } - inline UInt64 getLastModificationTimestamp() const { return file_metadata_ptr->last_modification_timestamp; } + UInt64 getLastModificationTimestamp() const { return file_metadata_ptr->last_modification_timestamp; } bool isModified(IRemoteFileMetadataPtr file_metadata_); - inline void markInvalid() + void markInvalid() { std::lock_guard lock(mutex); valid = false; } - inline bool isValid() + bool isValid() { std::lock_guard lock(mutex); return valid; } - inline bool isEnable() + bool isEnable() { std::lock_guard lock(mutex); return is_enable; } - inline void disable() + void disable() { std::lock_guard lock(mutex); is_enable = false; } - inline void enable() + void enable() { std::lock_guard lock(mutex); is_enable = true; } IRemoteFileMetadataPtr getFileMetadata() { return file_metadata_ptr; } - inline size_t getFileSize() const { return file_metadata_ptr->file_size; } + size_t getFileSize() const { return file_metadata_ptr->file_size; } void startBackgroundDownload(std::unique_ptr in_readbuffer_, BackgroundSchedulePool & thread_pool); diff --git a/src/Storages/ColumnsDescription.cpp b/src/Storages/ColumnsDescription.cpp index 16b89f24243..4cf66649ad1 100644 --- a/src/Storages/ColumnsDescription.cpp +++ b/src/Storages/ColumnsDescription.cpp @@ -547,7 +547,19 @@ bool ColumnsDescription::hasNested(const String & column_name) const bool ColumnsDescription::hasSubcolumn(const String & column_name) const { - return subcolumns.get<0>().count(column_name); + if (subcolumns.get<0>().count(column_name)) + return true; + + /// Check for dynamic subcolumns + auto [ordinary_column_name, dynamic_subcolumn_name] = Nested::splitName(column_name); + auto it = columns.get<1>().find(ordinary_column_name); + if (it != columns.get<1>().end() && it->type->hasDynamicSubcolumns()) + { + if (auto dynamic_subcolumn_type = it->type->tryGetSubcolumnType(dynamic_subcolumn_name)) + return true; + } + + return false; } const ColumnDescription & ColumnsDescription::get(const String & column_name) const @@ -644,6 +656,15 @@ std::optional ColumnsDescription::tryGetColumn(const GetColumns return *jt; } + /// Check for dynamic subcolumns. + auto [ordinary_column_name, dynamic_subcolumn_name] = Nested::splitName(column_name); + it = columns.get<1>().find(ordinary_column_name); + if (it != columns.get<1>().end() && it->type->hasDynamicSubcolumns()) + { + if (auto dynamic_subcolumn_type = it->type->tryGetSubcolumnType(dynamic_subcolumn_name)) + return NameAndTypePair(ordinary_column_name, dynamic_subcolumn_name, it->type, dynamic_subcolumn_type); + } + return {}; } @@ -730,9 +751,19 @@ bool ColumnsDescription::hasAlias(const String & column_name) const bool ColumnsDescription::hasColumnOrSubcolumn(GetColumnsOptions::Kind kind, const String & column_name) const { auto it = columns.get<1>().find(column_name); - return (it != columns.get<1>().end() - && (defaultKindToGetKind(it->default_desc.kind) & kind)) - || hasSubcolumn(column_name); + if ((it != columns.get<1>().end() && (defaultKindToGetKind(it->default_desc.kind) & kind)) || hasSubcolumn(column_name)) + return true; + + /// Check for dynamic subcolumns. + auto [ordinary_column_name, dynamic_subcolumn_name] = Nested::splitName(column_name); + it = columns.get<1>().find(ordinary_column_name); + if (it != columns.get<1>().end() && it->type->hasDynamicSubcolumns()) + { + if (auto dynamic_subcolumn_type = it->type->hasSubcolumn(dynamic_subcolumn_name)) + return true; + } + + return false; } bool ColumnsDescription::hasColumnOrNested(GetColumnsOptions::Kind kind, const String & column_name) const diff --git a/src/Storages/DataLakes/DeltaLakeMetadataParser.h b/src/Storages/DataLakes/DeltaLakeMetadataParser.h deleted file mode 100644 index df7276b90b4..00000000000 --- a/src/Storages/DataLakes/DeltaLakeMetadataParser.h +++ /dev/null @@ -1,22 +0,0 @@ -#pragma once - -#include -#include - -namespace DB -{ - -template -struct DeltaLakeMetadataParser -{ -public: - DeltaLakeMetadataParser(); - - Strings getFiles(const Configuration & configuration, ContextPtr context); - -private: - struct Impl; - std::shared_ptr impl; -}; - -} diff --git a/src/Storages/DataLakes/HudiMetadataParser.cpp b/src/Storages/DataLakes/HudiMetadataParser.cpp deleted file mode 100644 index 699dfe8fda0..00000000000 --- a/src/Storages/DataLakes/HudiMetadataParser.cpp +++ /dev/null @@ -1,116 +0,0 @@ -#include -#include -#include -#include -#include -#include "config.h" -#include -#include - -#if USE_AWS_S3 -#include -#include - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - -template -struct HudiMetadataParser::Impl -{ - /** - * Useful links: - * - https://hudi.apache.org/tech-specs/ - * - https://hudi.apache.org/docs/file_layouts/ - */ - - /** - * Hudi tables store metadata files and data files. - * Metadata files are stored in .hoodie/metadata directory. Though unlike DeltaLake and Iceberg, - * metadata is not required in order to understand which files we need to read, moreover, - * for Hudi metadata does not always exist. - * - * There can be two types of data files - * 1. base files (columnar file formats like Apache Parquet/Orc) - * 2. log files - * Currently we support reading only `base files`. - * Data file name format: - * [File Id]_[File Write Token]_[Transaction timestamp].[File Extension] - * - * To find needed parts we need to find out latest part file for every file group for every partition. - * Explanation why: - * Hudi reads in and overwrites the entire table/partition with each update. - * Hudi controls the number of file groups under a single partition according to the - * hoodie.parquet.max.file.size option. Once a single Parquet file is too large, Hudi creates a second file group. - * Each file group is identified by File Id. - */ - Strings processMetadataFiles(const Configuration & configuration) - { - auto log = getLogger("HudiMetadataParser"); - - const auto keys = MetadataReadHelper::listFiles(configuration, "", Poco::toLower(configuration.format)); - - using Partition = std::string; - using FileID = std::string; - struct FileInfo - { - String key; - UInt64 timestamp = 0; - }; - std::unordered_map> data_files; - - for (const auto & key : keys) - { - auto key_file = std::filesystem::path(key); - Strings file_parts; - const String stem = key_file.stem(); - splitInto<'_'>(file_parts, stem); - if (file_parts.size() != 3) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected format for file: {}", key); - - const auto partition = key_file.parent_path().stem(); - const auto & file_id = file_parts[0]; - const auto timestamp = parse(file_parts[2]); - - auto & file_info = data_files[partition][file_id]; - if (file_info.timestamp == 0 || file_info.timestamp < timestamp) - { - file_info.key = std::move(key); - file_info.timestamp = timestamp; - } - } - - Strings result; - for (auto & [partition, partition_data] : data_files) - { - LOG_TRACE(log, "Adding {} data files from partition {}", partition, partition_data.size()); - for (auto & [file_id, file_data] : partition_data) - result.push_back(std::move(file_data.key)); - } - return result; - } -}; - - -template -HudiMetadataParser::HudiMetadataParser() : impl(std::make_unique()) -{ -} - -template -Strings HudiMetadataParser::getFiles(const Configuration & configuration, ContextPtr) -{ - return impl->processMetadataFiles(configuration); -} - -template HudiMetadataParser::HudiMetadataParser(); -template Strings HudiMetadataParser::getFiles( - const StorageS3::Configuration & configuration, ContextPtr); - -} - -#endif diff --git a/src/Storages/DataLakes/HudiMetadataParser.h b/src/Storages/DataLakes/HudiMetadataParser.h deleted file mode 100644 index 6727ba2f718..00000000000 --- a/src/Storages/DataLakes/HudiMetadataParser.h +++ /dev/null @@ -1,22 +0,0 @@ -#pragma once - -#include -#include - -namespace DB -{ - -template -struct HudiMetadataParser -{ -public: - HudiMetadataParser(); - - Strings getFiles(const Configuration & configuration, ContextPtr context); - -private: - struct Impl; - std::shared_ptr impl; -}; - -} diff --git a/src/Storages/DataLakes/IStorageDataLake.h b/src/Storages/DataLakes/IStorageDataLake.h deleted file mode 100644 index 2147f2c9e6b..00000000000 --- a/src/Storages/DataLakes/IStorageDataLake.h +++ /dev/null @@ -1,136 +0,0 @@ -#pragma once - -#include "config.h" - -#if USE_AWS_S3 - -#include -#include -#include -#include -#include -#include - - -namespace DB -{ - -template -class IStorageDataLake : public Storage -{ -public: - static constexpr auto name = Name::name; - using Configuration = typename Storage::Configuration; - - template - explicit IStorageDataLake(const Configuration & configuration_, ContextPtr context_, LoadingStrictnessLevel mode, Args && ...args) - : Storage(getConfigurationForDataRead(configuration_, context_, {}, mode), context_, std::forward(args)...) - , base_configuration(configuration_) - , log(getLogger(getName())) {} // NOLINT(clang-analyzer-optin.cplusplus.VirtualCall) - - template - static StoragePtr create(const Configuration & configuration_, ContextPtr context_, LoadingStrictnessLevel mode, Args && ...args) - { - return std::make_shared>(configuration_, context_, mode, std::forward(args)...); - } - - String getName() const override { return name; } - - static ColumnsDescription getTableStructureFromData( - Configuration & base_configuration, - const std::optional & format_settings, - const ContextPtr & local_context) - { - auto configuration = getConfigurationForDataRead(base_configuration, local_context); - return Storage::getTableStructureFromData(configuration, format_settings, local_context); - } - - static Configuration getConfiguration(ASTs & engine_args, const ContextPtr & local_context) - { - return Storage::getConfiguration(engine_args, local_context, /* get_format_from_file */false); - } - - Configuration updateConfigurationAndGetCopy(const ContextPtr & local_context) override - { - std::lock_guard lock(configuration_update_mutex); - updateConfigurationImpl(local_context); - return Storage::getConfigurationCopy(); - } - - void updateConfiguration(const ContextPtr & local_context) override - { - std::lock_guard lock(configuration_update_mutex); - updateConfigurationImpl(local_context); - } - -private: - static Configuration getConfigurationForDataRead( - const Configuration & base_configuration, const ContextPtr & local_context, const Strings & keys = {}, - LoadingStrictnessLevel mode = LoadingStrictnessLevel::CREATE) - { - auto configuration{base_configuration}; - configuration.update(local_context); - configuration.static_configuration = true; - - try - { - if (keys.empty()) - configuration.keys = getDataFiles(configuration, local_context); - else - configuration.keys = keys; - - LOG_TRACE( - getLogger("DataLake"), - "New configuration path: {}, keys: {}", - configuration.getPath(), fmt::join(configuration.keys, ", ")); - - configuration.connect(local_context); - return configuration; - } - catch (...) - { - if (mode <= LoadingStrictnessLevel::CREATE) - throw; - tryLogCurrentException(__PRETTY_FUNCTION__); - return configuration; - } - } - - static Strings getDataFiles(const Configuration & configuration, const ContextPtr & local_context) - { - return MetadataParser().getFiles(configuration, local_context); - } - - void updateConfigurationImpl(const ContextPtr & local_context) - { - const bool updated = base_configuration.update(local_context); - auto new_keys = getDataFiles(base_configuration, local_context); - - if (!updated && new_keys == Storage::getConfigurationCopy().keys) - return; - - Storage::useConfiguration(getConfigurationForDataRead(base_configuration, local_context, new_keys)); - } - - Configuration base_configuration; - std::mutex configuration_update_mutex; - LoggerPtr log; -}; - - -template -static StoragePtr createDataLakeStorage(const StorageFactory::Arguments & args) -{ - auto configuration = DataLake::getConfiguration(args.engine_args, args.getLocalContext()); - - /// Data lakes use parquet format, no need for schema inference. - if (configuration.format == "auto") - configuration.format = "Parquet"; - - return DataLake::create(configuration, args.getContext(), args.mode, args.table_id, args.columns, args.constraints, - args.comment, getFormatSettings(args.getContext())); -} - -} - -#endif diff --git a/src/Storages/DataLakes/Iceberg/StorageIceberg.cpp b/src/Storages/DataLakes/Iceberg/StorageIceberg.cpp deleted file mode 100644 index 19cd97c3d4f..00000000000 --- a/src/Storages/DataLakes/Iceberg/StorageIceberg.cpp +++ /dev/null @@ -1,90 +0,0 @@ -#include - -#if USE_AWS_S3 && USE_AVRO - -namespace DB -{ - -StoragePtr StorageIceberg::create( - const DB::StorageIceberg::Configuration & base_configuration, - DB::ContextPtr context_, - LoadingStrictnessLevel mode, - const DB::StorageID & table_id_, - const DB::ColumnsDescription & columns_, - const DB::ConstraintsDescription & constraints_, - const String & comment, - std::optional format_settings_) -{ - auto configuration{base_configuration}; - configuration.update(context_); - std::unique_ptr metadata; - NamesAndTypesList schema_from_metadata; - try - { - metadata = parseIcebergMetadata(configuration, context_); - schema_from_metadata = metadata->getTableSchema(); - configuration.keys = metadata->getDataFiles(); - } - catch (...) - { - if (mode <= LoadingStrictnessLevel::CREATE) - throw; - tryLogCurrentException(__PRETTY_FUNCTION__); - } - - return std::make_shared( - std::move(metadata), - configuration, - context_, - table_id_, - columns_.empty() ? ColumnsDescription(schema_from_metadata) : columns_, - constraints_, - comment, - format_settings_); -} - -StorageIceberg::StorageIceberg( - std::unique_ptr metadata_, - const Configuration & configuration_, - ContextPtr context_, - const StorageID & table_id_, - const ColumnsDescription & columns_, - const ConstraintsDescription & constraints_, - const String & comment, - std::optional format_settings_) - : StorageS3(configuration_, context_, table_id_, columns_, constraints_, comment, format_settings_) - , current_metadata(std::move(metadata_)) - , base_configuration(configuration_) -{ -} - -ColumnsDescription StorageIceberg::getTableStructureFromData( - Configuration & base_configuration, - const std::optional &, - const ContextPtr & local_context) -{ - auto configuration{base_configuration}; - configuration.update(local_context); - auto metadata = parseIcebergMetadata(configuration, local_context); - return ColumnsDescription(metadata->getTableSchema()); -} - -void StorageIceberg::updateConfigurationImpl(const ContextPtr & local_context) -{ - const bool updated = base_configuration.update(local_context); - auto new_metadata = parseIcebergMetadata(base_configuration, local_context); - - if (!current_metadata || new_metadata->getVersion() != current_metadata->getVersion()) - current_metadata = std::move(new_metadata); - else if (!updated) - return; - - auto updated_configuration{base_configuration}; - /// If metadata wasn't changed, we won't list data files again. - updated_configuration.keys = current_metadata->getDataFiles(); - StorageS3::useConfiguration(updated_configuration); -} - -} - -#endif diff --git a/src/Storages/DataLakes/Iceberg/StorageIceberg.h b/src/Storages/DataLakes/Iceberg/StorageIceberg.h deleted file mode 100644 index 9e3885124d6..00000000000 --- a/src/Storages/DataLakes/Iceberg/StorageIceberg.h +++ /dev/null @@ -1,85 +0,0 @@ -#pragma once - -#include "config.h" - -#if USE_AWS_S3 && USE_AVRO - -# include -# include -# include -# include -# include -# include -# include - - -namespace DB -{ - -/// Storage for read-only integration with Apache Iceberg tables in Amazon S3 (see https://iceberg.apache.org/) -/// Right now it's implemented on top of StorageS3 and right now it doesn't support -/// many Iceberg features like schema evolution, partitioning, positional and equality deletes. -/// TODO: Implement Iceberg as a separate storage using IObjectStorage -/// (to support all object storages, not only S3) and add support for missing Iceberg features. -class StorageIceberg : public StorageS3 -{ -public: - static constexpr auto name = "Iceberg"; - - using Configuration = StorageS3::Configuration; - - static StoragePtr create(const Configuration & base_configuration, - ContextPtr context_, - LoadingStrictnessLevel mode, - const StorageID & table_id_, - const ColumnsDescription & columns_, - const ConstraintsDescription & constraints_, - const String & comment, - std::optional format_settings_); - - StorageIceberg( - std::unique_ptr metadata_, - const Configuration & configuration_, - ContextPtr context_, - const StorageID & table_id_, - const ColumnsDescription & columns_, - const ConstraintsDescription & constraints_, - const String & comment, - std::optional format_settings_); - - String getName() const override { return name; } - - static ColumnsDescription getTableStructureFromData( - Configuration & base_configuration, - const std::optional &, - const ContextPtr & local_context); - - static Configuration getConfiguration(ASTs & engine_args, ContextPtr local_context) - { - return StorageS3::getConfiguration(engine_args, local_context, /* get_format_from_file */false); - } - - Configuration updateConfigurationAndGetCopy(const ContextPtr & local_context) override - { - std::lock_guard lock(configuration_update_mutex); - updateConfigurationImpl(local_context); - return StorageS3::getConfigurationCopy(); - } - - void updateConfiguration(const ContextPtr & local_context) override - { - std::lock_guard lock(configuration_update_mutex); - updateConfigurationImpl(local_context); - } - -private: - void updateConfigurationImpl(const ContextPtr & local_context); - - std::unique_ptr current_metadata; - Configuration base_configuration; - std::mutex configuration_update_mutex; -}; - -} - -#endif diff --git a/src/Storages/DataLakes/S3MetadataReader.cpp b/src/Storages/DataLakes/S3MetadataReader.cpp deleted file mode 100644 index 62a486951fe..00000000000 --- a/src/Storages/DataLakes/S3MetadataReader.cpp +++ /dev/null @@ -1,87 +0,0 @@ -#include - -#if USE_AWS_S3 - -#include -#include -#include -#include -#include -#include -#include - -#include - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int S3_ERROR; -} - -std::shared_ptr -S3DataLakeMetadataReadHelper::createReadBuffer(const String & key, ContextPtr context, const StorageS3::Configuration & base_configuration) -{ - S3Settings::RequestSettings request_settings; - request_settings.max_single_read_retries = context->getSettingsRef().s3_max_single_read_retries; - return std::make_shared( - base_configuration.client, - base_configuration.url.bucket, - key, - base_configuration.url.version_id, - request_settings, - context->getReadSettings()); -} - -bool S3DataLakeMetadataReadHelper::exists(const String & key, const StorageS3::Configuration & configuration) -{ - return S3::objectExists(*configuration.client, configuration.url.bucket, key); -} - -std::vector S3DataLakeMetadataReadHelper::listFiles( - const StorageS3::Configuration & base_configuration, const String & prefix, const String & suffix) -{ - const auto & table_path = base_configuration.url.key; - const auto & bucket = base_configuration.url.bucket; - const auto & client = base_configuration.client; - - std::vector res; - S3::ListObjectsV2Request request; - Aws::S3::Model::ListObjectsV2Outcome outcome; - - request.SetBucket(bucket); - request.SetPrefix(std::filesystem::path(table_path) / prefix); - - bool is_finished{false}; - while (!is_finished) - { - outcome = client->ListObjectsV2(request); - if (!outcome.IsSuccess()) - throw S3Exception( - outcome.GetError().GetErrorType(), - "Could not list objects in bucket {} with key {}, S3 exception: {}, message: {}", - quoteString(bucket), - quoteString(base_configuration.url.key), - backQuote(outcome.GetError().GetExceptionName()), - quoteString(outcome.GetError().GetMessage())); - - const auto & result_batch = outcome.GetResult().GetContents(); - for (const auto & obj : result_batch) - { - const auto & filename = obj.GetKey(); - if (filename.ends_with(suffix)) - res.push_back(filename); - } - - request.SetContinuationToken(outcome.GetResult().GetNextContinuationToken()); - is_finished = !outcome.GetResult().GetIsTruncated(); - } - - LOG_TRACE(getLogger("S3DataLakeMetadataReadHelper"), "Listed {} files", res.size()); - - return res; -} - -} -#endif diff --git a/src/Storages/DataLakes/S3MetadataReader.h b/src/Storages/DataLakes/S3MetadataReader.h deleted file mode 100644 index c29a66b3813..00000000000 --- a/src/Storages/DataLakes/S3MetadataReader.h +++ /dev/null @@ -1,25 +0,0 @@ -#pragma once - -#include - -#if USE_AWS_S3 - -#include - -namespace DB -{ - -class ReadBuffer; - -struct S3DataLakeMetadataReadHelper -{ - static std::shared_ptr createReadBuffer( - const String & key, ContextPtr context, const StorageS3::Configuration & base_configuration); - - static bool exists(const String & key, const StorageS3::Configuration & configuration); - - static std::vector listFiles(const StorageS3::Configuration & configuration, const std::string & prefix = "", const std::string & suffix = ""); -}; -} - -#endif diff --git a/src/Storages/DataLakes/StorageDeltaLake.h b/src/Storages/DataLakes/StorageDeltaLake.h deleted file mode 100644 index 8b4ba28d6f7..00000000000 --- a/src/Storages/DataLakes/StorageDeltaLake.h +++ /dev/null @@ -1,25 +0,0 @@ -#pragma once - -#include -#include -#include -#include "config.h" - -#if USE_AWS_S3 -#include -#include -#endif - -namespace DB -{ - -struct StorageDeltaLakeName -{ - static constexpr auto name = "DeltaLake"; -}; - -#if USE_AWS_S3 && USE_PARQUET -using StorageDeltaLakeS3 = IStorageDataLake>; -#endif - -} diff --git a/src/Storages/DataLakes/StorageHudi.h b/src/Storages/DataLakes/StorageHudi.h deleted file mode 100644 index 84666f51405..00000000000 --- a/src/Storages/DataLakes/StorageHudi.h +++ /dev/null @@ -1,25 +0,0 @@ -#pragma once - -#include -#include -#include -#include "config.h" - -#if USE_AWS_S3 -#include -#include -#endif - -namespace DB -{ - -struct StorageHudiName -{ - static constexpr auto name = "Hudi"; -}; - -#if USE_AWS_S3 -using StorageHudiS3 = IStorageDataLake>; -#endif - -} diff --git a/src/Storages/DataLakes/registerDataLakes.cpp b/src/Storages/DataLakes/registerDataLakes.cpp deleted file mode 100644 index 118600f7212..00000000000 --- a/src/Storages/DataLakes/registerDataLakes.cpp +++ /dev/null @@ -1,50 +0,0 @@ -#include -#include "config.h" - -#if USE_AWS_S3 - -#include -#include -#include - - -namespace DB -{ - -#define REGISTER_DATA_LAKE_STORAGE(STORAGE, NAME) \ - factory.registerStorage( \ - NAME, \ - [](const StorageFactory::Arguments & args) \ - { \ - return createDataLakeStorage(args);\ - }, \ - { \ - .supports_settings = false, \ - .supports_schema_inference = true, \ - .source_access_type = AccessType::S3, \ - }); - -#if USE_PARQUET -void registerStorageDeltaLake(StorageFactory & factory) -{ - REGISTER_DATA_LAKE_STORAGE(StorageDeltaLakeS3, StorageDeltaLakeName::name) -} -#endif - -#if USE_AVRO /// StorageIceberg depending on Avro to parse metadata with Avro format. - -void registerStorageIceberg(StorageFactory & factory) -{ - REGISTER_DATA_LAKE_STORAGE(StorageIceberg, StorageIceberg::name) -} - -#endif - -void registerStorageHudi(StorageFactory & factory) -{ - REGISTER_DATA_LAKE_STORAGE(StorageHudiS3, StorageHudiName::name) -} - -} - -#endif diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp deleted file mode 100644 index 33bde34b4f9..00000000000 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ /dev/null @@ -1,1208 +0,0 @@ -#include "config.h" - -#if USE_HDFS - -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include -#include - -#include -#include - -#include - -namespace fs = std::filesystem; - -namespace ProfileEvents -{ - extern const Event EngineFileLikeReadFiles; -} - -namespace DB -{ -namespace ErrorCodes -{ - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int ACCESS_DENIED; - extern const int DATABASE_ACCESS_DENIED; - extern const int CANNOT_EXTRACT_TABLE_STRUCTURE; - extern const int BAD_ARGUMENTS; - extern const int LOGICAL_ERROR; - extern const int CANNOT_COMPILE_REGEXP; - extern const int CANNOT_DETECT_FORMAT; -} -namespace -{ - struct HDFSFileInfoDeleter - { - /// Can have only one entry (see hdfsGetPathInfo()) - void operator()(hdfsFileInfo * info) { hdfsFreeFileInfo(info, 1); } - }; - using HDFSFileInfoPtr = std::unique_ptr; - - /* Recursive directory listing with matched paths as a result. - * Have the same method in StorageFile. - */ - std::vector LSWithRegexpMatching( - const String & path_for_ls, - const HDFSFSPtr & fs, - const String & for_match) - { - std::vector result; - - const size_t first_glob_pos = for_match.find_first_of("*?{"); - - if (first_glob_pos == std::string::npos) - { - const String path = fs::path(path_for_ls + for_match.substr(1)).lexically_normal(); - HDFSFileInfoPtr hdfs_info(hdfsGetPathInfo(fs.get(), path.c_str())); - if (hdfs_info) // NOLINT - { - result.push_back(StorageHDFS::PathWithInfo{ - String(path), - StorageHDFS::PathInfo{hdfs_info->mLastMod, static_cast(hdfs_info->mSize)}}); - } - return result; - } - - const size_t end_of_path_without_globs = for_match.substr(0, first_glob_pos).rfind('/'); - const String suffix_with_globs = for_match.substr(end_of_path_without_globs); /// begin with '/' - const String prefix_without_globs = path_for_ls + for_match.substr(1, end_of_path_without_globs); /// ends with '/' - - const size_t next_slash_after_glob_pos = suffix_with_globs.find('/', 1); - - const std::string current_glob = suffix_with_globs.substr(0, next_slash_after_glob_pos); - - re2::RE2 matcher(makeRegexpPatternFromGlobs(current_glob)); - if (!matcher.ok()) - throw Exception(ErrorCodes::CANNOT_COMPILE_REGEXP, - "Cannot compile regex from glob ({}): {}", for_match, matcher.error()); - - HDFSFileInfo ls; - ls.file_info = hdfsListDirectory(fs.get(), prefix_without_globs.data(), &ls.length); - if (ls.file_info == nullptr && errno != ENOENT) // NOLINT - { - // ignore file not found exception, keep throw other exception, libhdfs3 doesn't have function to get exception type, so use errno. - throw Exception( - ErrorCodes::ACCESS_DENIED, "Cannot list directory {}: {}", prefix_without_globs, String(hdfsGetLastError())); - } - - if (!ls.file_info && ls.length > 0) - throw Exception(ErrorCodes::LOGICAL_ERROR, "file_info shouldn't be null"); - for (int i = 0; i < ls.length; ++i) - { - const String full_path = fs::path(ls.file_info[i].mName).lexically_normal(); - const size_t last_slash = full_path.rfind('/'); - const String file_name = full_path.substr(last_slash); - const bool looking_for_directory = next_slash_after_glob_pos != std::string::npos; - const bool is_directory = ls.file_info[i].mKind == 'D'; - /// Condition with type of current file_info means what kind of path is it in current iteration of ls - if (!is_directory && !looking_for_directory) - { - if (re2::RE2::FullMatch(file_name, matcher)) - result.push_back(StorageHDFS::PathWithInfo{ - String(full_path), - StorageHDFS::PathInfo{ls.file_info[i].mLastMod, static_cast(ls.file_info[i].mSize)}}); - } - else if (is_directory && looking_for_directory) - { - if (re2::RE2::FullMatch(file_name, matcher)) - { - std::vector result_part = LSWithRegexpMatching(fs::path(full_path) / "", fs, - suffix_with_globs.substr(next_slash_after_glob_pos)); - /// Recursion depth is limited by pattern. '*' works only for depth = 1, for depth = 2 pattern path is '*/*'. So we do not need additional check. - std::move(result_part.begin(), result_part.end(), std::back_inserter(result)); - } - } - } - - return result; - } - - std::pair getPathFromUriAndUriWithoutPath(const String & uri) - { - auto pos = uri.find("//"); - if (pos != std::string::npos && pos + 2 < uri.length()) - { - pos = uri.find('/', pos + 2); - if (pos != std::string::npos) - return {uri.substr(pos), uri.substr(0, pos)}; - } - - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Storage HDFS requires valid URL to be set"); - } - - std::vector getPathsList(const String & path_from_uri, const String & uri_without_path, ContextPtr context) - { - HDFSBuilderWrapper builder = createHDFSBuilder(uri_without_path + "/", context->getGlobalContext()->getConfigRef()); - HDFSFSPtr fs = createHDFSFS(builder.get()); - - Strings paths = expandSelectionGlob(path_from_uri); - - std::vector res; - - for (const auto & path : paths) - { - auto part_of_res = LSWithRegexpMatching("/", fs, path); - res.insert(res.end(), part_of_res.begin(), part_of_res.end()); - } - return res; - } -} - -StorageHDFS::StorageHDFS( - const String & uri_, - const StorageID & table_id_, - const String & format_name_, - const ColumnsDescription & columns_, - const ConstraintsDescription & constraints_, - const String & comment, - const ContextPtr & context_, - const String & compression_method_, - const bool distributed_processing_, - ASTPtr partition_by_) - : IStorage(table_id_) - , WithContext(context_) - , uris({uri_}) - , format_name(format_name_) - , compression_method(compression_method_) - , distributed_processing(distributed_processing_) - , partition_by(partition_by_) -{ - if (format_name != "auto") - FormatFactory::instance().checkFormatName(format_name); - context_->getRemoteHostFilter().checkURL(Poco::URI(uri_)); - checkHDFSURL(uri_); - - String path = uri_.substr(uri_.find('/', uri_.find("//") + 2)); - is_path_with_globs = path.find_first_of("*?{") != std::string::npos; - - StorageInMemoryMetadata storage_metadata; - - if (columns_.empty()) - { - ColumnsDescription columns; - if (format_name == "auto") - std::tie(columns, format_name) = getTableStructureAndFormatFromData(uri_, compression_method_, context_); - else - columns = getTableStructureFromData(format_name, uri_, compression_method, context_); - - storage_metadata.setColumns(columns); - } - else - { - if (format_name == "auto") - format_name = getTableStructureAndFormatFromData(uri_, compression_method_, context_).second; - - /// We don't allow special columns in HDFS storage. - if (!columns_.hasOnlyOrdinary()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table engine HDFS doesn't support special columns like MATERIALIZED, ALIAS or EPHEMERAL"); - storage_metadata.setColumns(columns_); - } - - storage_metadata.setConstraints(constraints_); - storage_metadata.setComment(comment); - setInMemoryMetadata(storage_metadata); - setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns())); -} - -namespace -{ - class ReadBufferIterator : public IReadBufferIterator, WithContext - { - public: - ReadBufferIterator( - const std::vector & paths_with_info_, - const String & uri_without_path_, - std::optional format_, - const String & compression_method_, - const ContextPtr & context_) - : WithContext(context_) - , paths_with_info(paths_with_info_) - , uri_without_path(uri_without_path_) - , format(std::move(format_)) - , compression_method(compression_method_) - { - } - - Data next() override - { - bool is_first = current_index == 0; - /// For default mode check cached columns for all paths on first iteration. - if (is_first && getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT) - { - if (auto cached_columns = tryGetColumnsFromCache(paths_with_info)) - return {nullptr, cached_columns, format}; - } - - StorageHDFS::PathWithInfo path_with_info; - - while (true) - { - if (current_index == paths_with_info.size()) - { - if (is_first) - { - if (format) - throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, - "The table structure cannot be extracted from a {} format file, because all files are empty. " - "You can specify table structure manually", *format); - - throw Exception( - ErrorCodes::CANNOT_DETECT_FORMAT, - "The data format cannot be detected by the contents of the files, because all files are empty. You can specify table structure manually"); - } - return {nullptr, std::nullopt, format}; - } - - path_with_info = paths_with_info[current_index++]; - if (getContext()->getSettingsRef().hdfs_skip_empty_files && path_with_info.info && path_with_info.info->size == 0) - continue; - - if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::UNION) - { - std::vector paths = {path_with_info}; - if (auto cached_columns = tryGetColumnsFromCache(paths)) - return {nullptr, cached_columns, format}; - } - - auto compression = chooseCompressionMethod(path_with_info.path, compression_method); - auto impl = std::make_unique(uri_without_path, path_with_info.path, getContext()->getGlobalContext()->getConfigRef(), getContext()->getReadSettings()); - if (!getContext()->getSettingsRef().hdfs_skip_empty_files || !impl->eof()) - { - const Int64 zstd_window_log_max = getContext()->getSettingsRef().zstd_window_log_max; - return {wrapReadBufferWithCompressionMethod(std::move(impl), compression, static_cast(zstd_window_log_max)), std::nullopt, format}; - } - } - } - - void setNumRowsToLastFile(size_t num_rows) override - { - if (!getContext()->getSettingsRef().schema_inference_use_cache_for_hdfs) - return; - - String source = uri_without_path + paths_with_info[current_index - 1].path; - auto key = getKeyForSchemaCache(source, *format, std::nullopt, getContext()); - StorageHDFS::getSchemaCache(getContext()).addNumRows(key, num_rows); - } - - void setSchemaToLastFile(const ColumnsDescription & columns) override - { - if (!getContext()->getSettingsRef().schema_inference_use_cache_for_hdfs - || getContext()->getSettingsRef().schema_inference_mode != SchemaInferenceMode::UNION) - return; - - String source = uri_without_path + paths_with_info[current_index - 1].path; - auto key = getKeyForSchemaCache(source, *format, std::nullopt, getContext()); - StorageHDFS::getSchemaCache(getContext()).addColumns(key, columns); - } - - void setResultingSchema(const ColumnsDescription & columns) override - { - if (!getContext()->getSettingsRef().schema_inference_use_cache_for_hdfs - || getContext()->getSettingsRef().schema_inference_mode != SchemaInferenceMode::DEFAULT) - return; - - Strings sources; - sources.reserve(paths_with_info.size()); - std::transform(paths_with_info.begin(), paths_with_info.end(), std::back_inserter(sources), [&](const StorageHDFS::PathWithInfo & path_with_info){ return uri_without_path + path_with_info.path; }); - auto cache_keys = getKeysForSchemaCache(sources, *format, {}, getContext()); - StorageHDFS::getSchemaCache(getContext()).addManyColumns(cache_keys, columns); - } - - void setFormatName(const String & format_name) override - { - format = format_name; - } - - String getLastFileName() const override - { - if (current_index != 0) - return paths_with_info[current_index - 1].path; - - return ""; - } - - bool supportsLastReadBufferRecreation() const override { return true; } - - std::unique_ptr recreateLastReadBuffer() override - { - chassert(current_index > 0 && current_index <= paths_with_info.size()); - auto path_with_info = paths_with_info[current_index - 1]; - auto compression = chooseCompressionMethod(path_with_info.path, compression_method); - auto impl = std::make_unique(uri_without_path, path_with_info.path, getContext()->getGlobalContext()->getConfigRef(), getContext()->getReadSettings()); - const Int64 zstd_window_log_max = getContext()->getSettingsRef().zstd_window_log_max; - return wrapReadBufferWithCompressionMethod(std::move(impl), compression, static_cast(zstd_window_log_max)); - } - - private: - std::optional tryGetColumnsFromCache(const std::vector & paths_with_info_) - { - auto context = getContext(); - - if (!context->getSettingsRef().schema_inference_use_cache_for_hdfs) - return std::nullopt; - - auto & schema_cache = StorageHDFS::getSchemaCache(context); - for (const auto & path_with_info : paths_with_info_) - { - auto get_last_mod_time = [&]() -> std::optional - { - if (path_with_info.info) - return path_with_info.info->last_mod_time; - - auto builder = createHDFSBuilder(uri_without_path + "/", context->getGlobalContext()->getConfigRef()); - auto fs = createHDFSFS(builder.get()); - HDFSFileInfoPtr hdfs_info(hdfsGetPathInfo(fs.get(), path_with_info.path.c_str())); - if (hdfs_info) - return hdfs_info->mLastMod; - - return std::nullopt; - }; - - String url = uri_without_path + path_with_info.path; - if (format) - { - auto cache_key = getKeyForSchemaCache(url, *format, {}, context); - if (auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time)) - return columns; - } - else - { - /// If format is unknown, we can iterate through all possible input formats - /// and check if we have an entry with this format and this file in schema cache. - /// If we have such entry for some format, we can use this format to read the file. - for (const auto & format_name : FormatFactory::instance().getAllInputFormats()) - { - auto cache_key = getKeyForSchemaCache(url, format_name, {}, context); - if (auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time)) - { - /// Now format is known. It should be the same for all files. - format = format_name; - return columns; - } - } - } - } - - return std::nullopt; - } - - const std::vector & paths_with_info; - const String & uri_without_path; - std::optional format; - const String & compression_method; - size_t current_index = 0; - }; -} - -std::pair StorageHDFS::getTableStructureAndFormatFromDataImpl( - std::optional format, - const String & uri, - const String & compression_method, - const ContextPtr & ctx) -{ - const auto [path_from_uri, uri_without_path] = getPathFromUriAndUriWithoutPath(uri); - auto paths_with_info = getPathsList(path_from_uri, uri, ctx); - - if (paths_with_info.empty() && (!format || !FormatFactory::instance().checkIfFormatHasExternalSchemaReader(*format))) - { - if (format) - throw Exception( - ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, - "The table structure cannot be extracted from a {} format file, because there are no files in HDFS with provided path." - " You can specify table structure manually", *format); - - throw Exception( - ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, - "The data format cannot be detected by the contents of the files, because there are no files in HDFS with provided path." - " You can specify the format manually"); - } - - ReadBufferIterator read_buffer_iterator(paths_with_info, uri_without_path, format, compression_method, ctx); - if (format) - return {readSchemaFromFormat(*format, std::nullopt, read_buffer_iterator, ctx), *format}; - return detectFormatAndReadSchema(std::nullopt, read_buffer_iterator, ctx); -} - -std::pair StorageHDFS::getTableStructureAndFormatFromData(const String & uri, const String & compression_method, const ContextPtr & ctx) -{ - return getTableStructureAndFormatFromDataImpl(std::nullopt, uri, compression_method, ctx); -} - -ColumnsDescription StorageHDFS::getTableStructureFromData(const String & format, const String & uri, const String & compression_method, const DB::ContextPtr & ctx) -{ - return getTableStructureAndFormatFromDataImpl(format, uri, compression_method, ctx).first; -} - -class HDFSSource::DisclosedGlobIterator::Impl -{ -public: - Impl(const String & uri, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const ContextPtr & context) - { - const auto [path_from_uri, uri_without_path] = getPathFromUriAndUriWithoutPath(uri); - uris = getPathsList(path_from_uri, uri_without_path, context); - ActionsDAGPtr filter_dag; - if (!uris.empty()) - filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns); - - if (filter_dag) - { - std::vector paths; - paths.reserve(uris.size()); - for (const auto & path_with_info : uris) - paths.push_back(path_with_info.path); - - VirtualColumnUtils::filterByPathOrFile(uris, paths, filter_dag, virtual_columns, context); - } - auto file_progress_callback = context->getFileProgressCallback(); - - for (auto & elem : uris) - { - elem.path = uri_without_path + elem.path; - if (file_progress_callback && elem.info) - file_progress_callback(FileProgress(0, elem.info->size)); - } - uris_iter = uris.begin(); - } - - StorageHDFS::PathWithInfo next() - { - std::lock_guard lock(mutex); - if (uris_iter != uris.end()) - { - auto answer = *uris_iter; - ++uris_iter; - return answer; - } - return {}; - } -private: - std::mutex mutex; - std::vector uris; - std::vector::iterator uris_iter; -}; - -class HDFSSource::URISIterator::Impl : WithContext -{ -public: - explicit Impl(const std::vector & uris_, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const ContextPtr & context_) - : WithContext(context_), uris(uris_), file_progress_callback(context_->getFileProgressCallback()) - { - ActionsDAGPtr filter_dag; - if (!uris.empty()) - filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns); - - if (filter_dag) - { - std::vector paths; - paths.reserve(uris.size()); - for (const auto & uri : uris) - paths.push_back(getPathFromUriAndUriWithoutPath(uri).first); - - VirtualColumnUtils::filterByPathOrFile(uris, paths, filter_dag, virtual_columns, getContext()); - } - - if (!uris.empty()) - { - auto path_and_uri = getPathFromUriAndUriWithoutPath(uris[0]); - builder = createHDFSBuilder(path_and_uri.second + "/", getContext()->getGlobalContext()->getConfigRef()); - fs = createHDFSFS(builder.get()); - } - } - - StorageHDFS::PathWithInfo next() - { - String uri; - HDFSFileInfoPtr hdfs_info; - do - { - size_t current_index = index.fetch_add(1); - if (current_index >= uris.size()) - return {"", {}}; - - uri = uris[current_index]; - auto path_and_uri = getPathFromUriAndUriWithoutPath(uri); - hdfs_info.reset(hdfsGetPathInfo(fs.get(), path_and_uri.first.c_str())); - } - /// Skip non-existed files. - while (!hdfs_info && String(hdfsGetLastError()).find("FileNotFoundException") != std::string::npos); - - std::optional info; - if (hdfs_info) - { - info = StorageHDFS::PathInfo{hdfs_info->mLastMod, static_cast(hdfs_info->mSize)}; - if (file_progress_callback) - file_progress_callback(FileProgress(0, hdfs_info->mSize)); - } - - return {uri, info}; - } - -private: - std::atomic_size_t index = 0; - Strings uris; - HDFSBuilderWrapper builder; - HDFSFSPtr fs; - std::function file_progress_callback; -}; - -HDFSSource::DisclosedGlobIterator::DisclosedGlobIterator(const String & uri, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const ContextPtr & context) - : pimpl(std::make_shared(uri, predicate, virtual_columns, context)) {} - -StorageHDFS::PathWithInfo HDFSSource::DisclosedGlobIterator::next() -{ - return pimpl->next(); -} - -HDFSSource::URISIterator::URISIterator(const std::vector & uris_, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const ContextPtr & context) - : pimpl(std::make_shared(uris_, predicate, virtual_columns, context)) -{ -} - -StorageHDFS::PathWithInfo HDFSSource::URISIterator::next() -{ - return pimpl->next(); -} - -HDFSSource::HDFSSource( - const ReadFromFormatInfo & info, - StorageHDFSPtr storage_, - const ContextPtr & context_, - UInt64 max_block_size_, - std::shared_ptr file_iterator_, - bool need_only_count_) - : ISource(info.source_header, false) - , WithContext(context_) - , storage(std::move(storage_)) - , block_for_format(info.format_header) - , requested_columns(info.requested_columns) - , requested_virtual_columns(info.requested_virtual_columns) - , max_block_size(max_block_size_) - , file_iterator(file_iterator_) - , columns_description(info.columns_description) - , need_only_count(need_only_count_) -{ - initialize(); -} - -HDFSSource::~HDFSSource() = default; - -bool HDFSSource::initialize() -{ - bool skip_empty_files = getContext()->getSettingsRef().hdfs_skip_empty_files; - StorageHDFS::PathWithInfo path_with_info; - while (true) - { - path_with_info = (*file_iterator)(); - if (path_with_info.path.empty()) - return false; - - if (path_with_info.info && skip_empty_files && path_with_info.info->size == 0) - continue; - - current_path = path_with_info.path; - const auto [path_from_uri, uri_without_path] = getPathFromUriAndUriWithoutPath(current_path); - - std::optional file_size; - if (!path_with_info.info) - { - auto builder = createHDFSBuilder(uri_without_path + "/", getContext()->getGlobalContext()->getConfigRef()); - auto fs = createHDFSFS(builder.get()); - HDFSFileInfoPtr hdfs_info(hdfsGetPathInfo(fs.get(), path_from_uri.c_str())); - if (hdfs_info) - path_with_info.info = StorageHDFS::PathInfo{hdfs_info->mLastMod, static_cast(hdfs_info->mSize)}; - } - - if (path_with_info.info) - file_size = path_with_info.info->size; - - auto compression = chooseCompressionMethod(path_from_uri, storage->compression_method); - auto impl = std::make_unique( - uri_without_path, path_from_uri, getContext()->getGlobalContext()->getConfigRef(), getContext()->getReadSettings(), 0, false, file_size); - if (!skip_empty_files || !impl->eof()) - { - impl->setProgressCallback(getContext()); - const Int64 zstd_window_log_max = getContext()->getSettingsRef().zstd_window_log_max; - read_buf = wrapReadBufferWithCompressionMethod(std::move(impl), compression, static_cast(zstd_window_log_max)); - break; - } - } - - current_path = path_with_info.path; - current_file_size = path_with_info.info ? std::optional(path_with_info.info->size) : std::nullopt; - - QueryPipelineBuilder builder; - std::optional num_rows_from_cache = need_only_count && getContext()->getSettingsRef().use_cache_for_count_from_files ? tryGetNumRowsFromCache(path_with_info) : std::nullopt; - if (num_rows_from_cache) - { - /// We should not return single chunk with all number of rows, - /// because there is a chance that this chunk will be materialized later - /// (it can cause memory problems even with default values in columns or when virtual columns are requested). - /// Instead, we use a special ConstChunkGenerator that will generate chunks - /// with max_block_size rows until total number of rows is reached. - auto source = std::make_shared(block_for_format, *num_rows_from_cache, max_block_size); - builder.init(Pipe(source)); - } - else - { - std::optional max_parsing_threads; - if (need_only_count) - max_parsing_threads = 1; - - input_format = getContext()->getInputFormat(storage->format_name, *read_buf, block_for_format, max_block_size, std::nullopt, max_parsing_threads); - - if (need_only_count) - input_format->needOnlyCount(); - - builder.init(Pipe(input_format)); - if (columns_description.hasDefaults()) - { - builder.addSimpleTransform([&](const Block & header) - { - return std::make_shared(header, columns_description, *input_format, getContext()); - }); - } - } - - /// Add ExtractColumnsTransform to extract requested columns/subcolumns - /// from the chunk read by IInputFormat. - builder.addSimpleTransform([&](const Block & header) - { - return std::make_shared(header, requested_columns); - }); - - pipeline = std::make_unique(QueryPipelineBuilder::getPipeline(std::move(builder))); - reader = std::make_unique(*pipeline); - - ProfileEvents::increment(ProfileEvents::EngineFileLikeReadFiles); - return true; -} - -String HDFSSource::getName() const -{ - return "HDFSSource"; -} - -Chunk HDFSSource::generate() -{ - while (true) - { - if (isCancelled() || !reader) - { - if (reader) - reader->cancel(); - break; - } - - Chunk chunk; - if (reader->pull(chunk)) - { - UInt64 num_rows = chunk.getNumRows(); - total_rows_in_file += num_rows; - size_t chunk_size = 0; - if (input_format) - chunk_size = input_format->getApproxBytesReadForChunk(); - progress(num_rows, chunk_size ? chunk_size : chunk.bytes()); - VirtualColumnUtils::addRequestedPathFileAndSizeVirtualsToChunk(chunk, requested_virtual_columns, current_path, current_file_size); - return chunk; - } - - if (input_format && getContext()->getSettingsRef().use_cache_for_count_from_files) - addNumRowsToCache(current_path, total_rows_in_file); - - total_rows_in_file = 0; - - reader.reset(); - pipeline.reset(); - input_format.reset(); - read_buf.reset(); - - if (!initialize()) - break; - } - return {}; -} - -void HDFSSource::addNumRowsToCache(const String & path, size_t num_rows) -{ - auto cache_key = getKeyForSchemaCache(path, storage->format_name, std::nullopt, getContext()); - StorageHDFS::getSchemaCache(getContext()).addNumRows(cache_key, num_rows); -} - -std::optional HDFSSource::tryGetNumRowsFromCache(const StorageHDFS::PathWithInfo & path_with_info) -{ - auto cache_key = getKeyForSchemaCache(path_with_info.path, storage->format_name, std::nullopt, getContext()); - auto get_last_mod_time = [&]() -> std::optional - { - if (path_with_info.info) - return path_with_info.info->last_mod_time; - return std::nullopt; - }; - - return StorageHDFS::getSchemaCache(getContext()).tryGetNumRows(cache_key, get_last_mod_time); -} - -class HDFSSink : public SinkToStorage -{ -public: - HDFSSink(const String & uri, - const String & format, - const Block & sample_block, - const ContextPtr & context, - const CompressionMethod compression_method) - : SinkToStorage(sample_block) - { - const auto & settings = context->getSettingsRef(); - write_buf = wrapWriteBufferWithCompressionMethod( - std::make_unique( - uri, context->getGlobalContext()->getConfigRef(), context->getSettingsRef().hdfs_replication, context->getWriteSettings()), - compression_method, - static_cast(settings.output_format_compression_level), - static_cast(settings.output_format_compression_zstd_window_log)); - writer = FormatFactory::instance().getOutputFormatParallelIfPossible(format, *write_buf, sample_block, context); - } - - String getName() const override { return "HDFSSink"; } - - void consume(Chunk chunk) override - { - std::lock_guard lock(cancel_mutex); - if (cancelled) - return; - writer->write(getHeader().cloneWithColumns(chunk.detachColumns())); - } - - void onCancel() override - { - std::lock_guard lock(cancel_mutex); - finalize(); - cancelled = true; - } - - void onException(std::exception_ptr exception) override - { - std::lock_guard lock(cancel_mutex); - try - { - std::rethrow_exception(exception); - } - catch (...) - { - /// An exception context is needed to proper delete write buffers without finalization - release(); - } - } - - void onFinish() override - { - std::lock_guard lock(cancel_mutex); - finalize(); - } - -private: - void finalize() - { - if (!writer) - return; - - try - { - writer->finalize(); - writer->flush(); - write_buf->sync(); - write_buf->finalize(); - } - catch (...) - { - /// Stop ParallelFormattingOutputFormat correctly. - release(); - throw; - } - } - - void release() - { - writer.reset(); - write_buf->finalize(); - } - - std::unique_ptr write_buf; - OutputFormatPtr writer; - std::mutex cancel_mutex; - bool cancelled = false; -}; - -namespace -{ - std::optional checkAndGetNewFileOnInsertIfNeeded(const ContextPtr & context, const String & uri, size_t sequence_number) - { - const auto [path_from_uri, uri_without_path] = getPathFromUriAndUriWithoutPath(uri); - - HDFSBuilderWrapper builder = createHDFSBuilder(uri_without_path + "/", context->getGlobalContext()->getConfigRef()); - HDFSFSPtr fs = createHDFSFS(builder.get()); - - if (context->getSettingsRef().hdfs_truncate_on_insert || hdfsExists(fs.get(), path_from_uri.c_str())) - return std::nullopt; - - if (context->getSettingsRef().hdfs_create_new_file_on_insert) - { - auto pos = uri.find_first_of('.', uri.find_last_of('/')); - String new_uri; - do - { - new_uri = uri.substr(0, pos) + "." + std::to_string(sequence_number) + (pos == std::string::npos ? "" : uri.substr(pos)); - ++sequence_number; - } - while (!hdfsExists(fs.get(), new_uri.c_str())); - - return new_uri; - } - - throw Exception( - ErrorCodes::BAD_ARGUMENTS, - "File with path {} already exists. If you want to overwrite it, enable setting hdfs_truncate_on_insert, " - "if you want to create new file on each insert, enable setting hdfs_create_new_file_on_insert", - path_from_uri); - } -} - -class PartitionedHDFSSink : public PartitionedSink -{ -public: - PartitionedHDFSSink( - const ASTPtr & partition_by, - const String & uri_, - const String & format_, - const Block & sample_block_, - ContextPtr context_, - const CompressionMethod compression_method_) - : PartitionedSink(partition_by, context_, sample_block_) - , uri(uri_) - , format(format_) - , sample_block(sample_block_) - , context(context_) - , compression_method(compression_method_) - { - } - - SinkPtr createSinkForPartition(const String & partition_id) override - { - auto path = PartitionedSink::replaceWildcards(uri, partition_id); - PartitionedSink::validatePartitionKey(path, true); - if (auto new_path = checkAndGetNewFileOnInsertIfNeeded(context, path, 1)) - path = *new_path; - return std::make_shared(path, format, sample_block, context, compression_method); - } - -private: - const String uri; - const String format; - const Block sample_block; - ContextPtr context; - const CompressionMethod compression_method; -}; - - -bool StorageHDFS::supportsSubsetOfColumns(const ContextPtr & context_) const -{ - return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(format_name, context_); -} - -class ReadFromHDFS : public SourceStepWithFilter -{ -public: - std::string getName() const override { return "ReadFromHDFS"; } - void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; - void applyFilters(ActionDAGNodes added_filter_nodes) override; - - ReadFromHDFS( - const Names & column_names_, - const SelectQueryInfo & query_info_, - const StorageSnapshotPtr & storage_snapshot_, - const ContextPtr & context_, - Block sample_block, - ReadFromFormatInfo info_, - bool need_only_count_, - std::shared_ptr storage_, - size_t max_block_size_, - size_t num_streams_) - : SourceStepWithFilter( - DataStream{.header = std::move(sample_block)}, - column_names_, - query_info_, - storage_snapshot_, - context_) - , info(std::move(info_)) - , need_only_count(need_only_count_) - , storage(std::move(storage_)) - , max_block_size(max_block_size_) - , num_streams(num_streams_) - { - } - -private: - ReadFromFormatInfo info; - const bool need_only_count; - std::shared_ptr storage; - - size_t max_block_size; - size_t num_streams; - - std::shared_ptr iterator_wrapper; - - void createIterator(const ActionsDAG::Node * predicate); -}; - -void ReadFromHDFS::applyFilters(ActionDAGNodes added_filter_nodes) -{ - SourceStepWithFilter::applyFilters(std::move(added_filter_nodes)); - - const ActionsDAG::Node * predicate = nullptr; - if (filter_actions_dag) - predicate = filter_actions_dag->getOutputs().at(0); - - createIterator(predicate); -} - -void StorageHDFS::read( - QueryPlan & query_plan, - const Names & column_names, - const StorageSnapshotPtr & storage_snapshot, - SelectQueryInfo & query_info, - ContextPtr context_, - QueryProcessingStage::Enum /*processed_stage*/, - size_t max_block_size, - size_t num_streams) -{ - auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(context_)); - bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty()) - && context_->getSettingsRef().optimize_count_from_files; - - auto this_ptr = std::static_pointer_cast(shared_from_this()); - - auto reading = std::make_unique( - column_names, - query_info, - storage_snapshot, - context_, - read_from_format_info.source_header, - std::move(read_from_format_info), - need_only_count, - std::move(this_ptr), - max_block_size, - num_streams); - - query_plan.addStep(std::move(reading)); -} - -void ReadFromHDFS::createIterator(const ActionsDAG::Node * predicate) -{ - if (iterator_wrapper) - return; - - if (storage->distributed_processing) - { - iterator_wrapper = std::make_shared( - [callback = context->getReadTaskCallback()]() -> StorageHDFS::PathWithInfo { - return StorageHDFS::PathWithInfo{callback(), std::nullopt}; - }); - } - else if (storage->is_path_with_globs) - { - /// Iterate through disclosed globs and make a source for each file - auto glob_iterator = std::make_shared(storage->uris[0], predicate, storage->getVirtualsList(), context); - iterator_wrapper = std::make_shared([glob_iterator]() - { - return glob_iterator->next(); - }); - } - else - { - auto uris_iterator = std::make_shared(storage->uris, predicate, storage->getVirtualsList(), context); - iterator_wrapper = std::make_shared([uris_iterator]() - { - return uris_iterator->next(); - }); - } -} - -void ReadFromHDFS::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) -{ - createIterator(nullptr); - - Pipes pipes; - for (size_t i = 0; i < num_streams; ++i) - { - pipes.emplace_back(std::make_shared( - info, - storage, - context, - max_block_size, - iterator_wrapper, - need_only_count)); - } - - auto pipe = Pipe::unitePipes(std::move(pipes)); - if (pipe.empty()) - pipe = Pipe(std::make_shared(info.source_header)); - - for (const auto & processor : pipe.getProcessors()) - processors.emplace_back(processor); - - pipeline.init(std::move(pipe)); -} - -SinkToStoragePtr StorageHDFS::write(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr context_, bool /*async_insert*/) -{ - String current_uri = uris.front(); - - bool has_wildcards = current_uri.find(PartitionedSink::PARTITION_ID_WILDCARD) != String::npos; - const auto * insert_query = dynamic_cast(query.get()); - auto partition_by_ast = insert_query ? (insert_query->partition_by ? insert_query->partition_by : partition_by) : nullptr; - bool is_partitioned_implementation = partition_by_ast && has_wildcards; - - if (is_partitioned_implementation) - { - String path = current_uri.substr(current_uri.find('/', current_uri.find("//") + 2)); - if (PartitionedSink::replaceWildcards(path, "").find_first_of("*?{") != std::string::npos) - throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED, "URI '{}' contains globs, so the table is in readonly mode", uris.back()); - - return std::make_shared( - partition_by_ast, - current_uri, - format_name, - metadata_snapshot->getSampleBlock(), - context_, - chooseCompressionMethod(current_uri, compression_method)); - } - else - { - if (is_path_with_globs) - throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED, "URI '{}' contains globs, so the table is in readonly mode", uris.back()); - - if (auto new_uri = checkAndGetNewFileOnInsertIfNeeded(context_, uris.front(), uris.size())) - { - uris.push_back(*new_uri); - current_uri = *new_uri; - } - - return std::make_shared(current_uri, - format_name, - metadata_snapshot->getSampleBlock(), - context_, - chooseCompressionMethod(current_uri, compression_method)); - } -} - -void StorageHDFS::truncate(const ASTPtr & /* query */, const StorageMetadataPtr &, ContextPtr local_context, TableExclusiveLockHolder &) -{ - const size_t begin_of_path = uris[0].find('/', uris[0].find("//") + 2); - const String url = uris[0].substr(0, begin_of_path); - - HDFSBuilderWrapper builder = createHDFSBuilder(url + "/", local_context->getGlobalContext()->getConfigRef()); - auto fs = createHDFSFS(builder.get()); - - for (const auto & uri : uris) - { - const String path = uri.substr(begin_of_path); - int ret = hdfsDelete(fs.get(), path.data(), 0); - if (ret) - throw Exception(ErrorCodes::ACCESS_DENIED, "Unable to truncate hdfs table: {}", std::string(hdfsGetLastError())); - } -} - - -void registerStorageHDFS(StorageFactory & factory) -{ - factory.registerStorage("HDFS", [](const StorageFactory::Arguments & args) - { - ASTs & engine_args = args.engine_args; - - if (engine_args.empty() || engine_args.size() > 3) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Storage HDFS requires 1, 2 or 3 arguments: " - "url, name of used format (taken from file extension by default) and optional compression method."); - - engine_args[0] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[0], args.getLocalContext()); - - String url = checkAndGetLiteralArgument(engine_args[0], "url"); - - String format_name = "auto"; - if (engine_args.size() > 1) - { - engine_args[1] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[1], args.getLocalContext()); - format_name = checkAndGetLiteralArgument(engine_args[1], "format_name"); - } - - if (format_name == "auto") - format_name = FormatFactory::instance().tryGetFormatFromFileName(url).value_or("auto"); - - String compression_method; - if (engine_args.size() == 3) - { - engine_args[2] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[2], args.getLocalContext()); - compression_method = checkAndGetLiteralArgument(engine_args[2], "compression_method"); - } else compression_method = "auto"; - - ASTPtr partition_by; - if (args.storage_def->partition_by) - partition_by = args.storage_def->partition_by->clone(); - - return std::make_shared( - url, args.table_id, format_name, args.columns, args.constraints, args.comment, args.getContext(), compression_method, false, partition_by); - }, - { - .supports_sort_order = true, // for partition by - .supports_schema_inference = true, - .source_access_type = AccessType::HDFS, - }); -} - -SchemaCache & StorageHDFS::getSchemaCache(const ContextPtr & ctx) -{ - static SchemaCache schema_cache(ctx->getConfigRef().getUInt("schema_inference_cache_max_elements_for_hdfs", DEFAULT_SCHEMA_CACHE_ELEMENTS)); - return schema_cache; -} - -} - -#endif diff --git a/src/Storages/HDFS/StorageHDFS.h b/src/Storages/HDFS/StorageHDFS.h deleted file mode 100644 index b8faa27d678..00000000000 --- a/src/Storages/HDFS/StorageHDFS.h +++ /dev/null @@ -1,188 +0,0 @@ -#pragma once - -#include "config.h" - -#if USE_HDFS - -#include -#include -#include -#include -#include -#include - -namespace DB -{ - -class IInputFormat; - -/** - * This class represents table engine for external hdfs files. - * Read method is supported for now. - */ -class StorageHDFS final : public IStorage, WithContext -{ -public: - struct PathInfo - { - time_t last_mod_time; - size_t size; - }; - - struct PathWithInfo - { - PathWithInfo() = default; - PathWithInfo(const String & path_, const std::optional & info_) : path(path_), info(info_) {} - String path; - std::optional info; - }; - - StorageHDFS( - const String & uri_, - const StorageID & table_id_, - const String & format_name_, - const ColumnsDescription & columns_, - const ConstraintsDescription & constraints_, - const String & comment, - const ContextPtr & context_, - const String & compression_method_ = "", - bool distributed_processing_ = false, - ASTPtr partition_by = nullptr); - - String getName() const override { return "HDFS"; } - - void read( - QueryPlan & query_plan, - const Names & column_names, - const StorageSnapshotPtr & storage_snapshot, - SelectQueryInfo & query_info, - ContextPtr context, - QueryProcessingStage::Enum processed_stage, - size_t max_block_size, - size_t num_streams) override; - - SinkToStoragePtr write(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr context, bool async_insert) override; - - void truncate( - const ASTPtr & query, - const StorageMetadataPtr & metadata_snapshot, - ContextPtr local_context, - TableExclusiveLockHolder &) override; - - bool supportsPartitionBy() const override { return true; } - - /// Check if the format is column-oriented. - /// Is is useful because column oriented formats could effectively skip unknown columns - /// So we can create a header of only required columns in read method and ask - /// format to read only them. Note: this hack cannot be done with ordinary formats like TSV. - bool supportsSubsetOfColumns(const ContextPtr & context_) const; - - bool supportsSubcolumns() const override { return true; } - - static ColumnsDescription getTableStructureFromData( - const String & format, - const String & uri, - const String & compression_method, - const ContextPtr & ctx); - - static std::pair getTableStructureAndFormatFromData( - const String & uri, - const String & compression_method, - const ContextPtr & ctx); - - static SchemaCache & getSchemaCache(const ContextPtr & ctx); - - bool supportsTrivialCountOptimization(const StorageSnapshotPtr &, ContextPtr) const override { return true; } - -protected: - friend class HDFSSource; - friend class ReadFromHDFS; - -private: - static std::pair getTableStructureAndFormatFromDataImpl( - std::optional format, - const String & uri, - const String & compression_method, - const ContextPtr & ctx); - - std::vector uris; - String format_name; - String compression_method; - const bool distributed_processing; - ASTPtr partition_by; - bool is_path_with_globs; - - LoggerPtr log = getLogger("StorageHDFS"); -}; - -class PullingPipelineExecutor; - -class HDFSSource : public ISource, WithContext -{ -public: - class DisclosedGlobIterator - { - public: - DisclosedGlobIterator(const String & uri_, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const ContextPtr & context); - StorageHDFS::PathWithInfo next(); - private: - class Impl; - /// shared_ptr to have copy constructor - std::shared_ptr pimpl; - }; - - class URISIterator - { - public: - URISIterator(const std::vector & uris_, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const ContextPtr & context); - StorageHDFS::PathWithInfo next(); - private: - class Impl; - /// shared_ptr to have copy constructor - std::shared_ptr pimpl; - }; - - using IteratorWrapper = std::function; - using StorageHDFSPtr = std::shared_ptr; - - HDFSSource( - const ReadFromFormatInfo & info, - StorageHDFSPtr storage_, - const ContextPtr & context_, - UInt64 max_block_size_, - std::shared_ptr file_iterator_, - bool need_only_count_); - - ~HDFSSource() override; - - String getName() const override; - - Chunk generate() override; - -private: - void addNumRowsToCache(const String & path, size_t num_rows); - std::optional tryGetNumRowsFromCache(const StorageHDFS::PathWithInfo & path_with_info); - - StorageHDFSPtr storage; - Block block_for_format; - NamesAndTypesList requested_columns; - NamesAndTypesList requested_virtual_columns; - UInt64 max_block_size; - std::shared_ptr file_iterator; - ColumnsDescription columns_description; - bool need_only_count; - size_t total_rows_in_file = 0; - - std::unique_ptr read_buf; - std::shared_ptr input_format; - std::unique_ptr pipeline; - std::unique_ptr reader; - String current_path; - std::optional current_file_size; - - /// Recreate ReadBuffer and PullingPipelineExecutor for each file. - bool initialize(); -}; -} - -#endif diff --git a/src/Storages/HDFS/StorageHDFSCluster.cpp b/src/Storages/HDFS/StorageHDFSCluster.cpp deleted file mode 100644 index bde8b84e349..00000000000 --- a/src/Storages/HDFS/StorageHDFSCluster.cpp +++ /dev/null @@ -1,98 +0,0 @@ -#include "config.h" -#include "Interpreters/Context_fwd.h" - -#if USE_HDFS - -#include - -#include -#include -#include -#include -#include - -#include - -#include -#include -#include - -#include -#include -#include -#include -#include - -#include -#include - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - -StorageHDFSCluster::StorageHDFSCluster( - ContextPtr context_, - const String & cluster_name_, - const String & uri_, - const StorageID & table_id_, - const String & format_name_, - const ColumnsDescription & columns_, - const ConstraintsDescription & constraints_, - const String & compression_method) - : IStorageCluster(cluster_name_, table_id_, getLogger("StorageHDFSCluster (" + table_id_.table_name + ")")) - , uri(uri_) - , format_name(format_name_) -{ - checkHDFSURL(uri_); - context_->getRemoteHostFilter().checkURL(Poco::URI(uri_)); - - StorageInMemoryMetadata storage_metadata; - - if (columns_.empty()) - { - ColumnsDescription columns; - if (format_name == "auto") - std::tie(columns, format_name) = StorageHDFS::getTableStructureAndFormatFromData(uri_, compression_method, context_); - else - columns = StorageHDFS::getTableStructureFromData(format_name, uri_, compression_method, context_); - storage_metadata.setColumns(columns); - } - else - { - if (format_name == "auto") - format_name = StorageHDFS::getTableStructureAndFormatFromData(uri_, compression_method, context_).second; - - storage_metadata.setColumns(columns_); - } - - storage_metadata.setConstraints(constraints_); - setInMemoryMetadata(storage_metadata); - setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns())); -} - -void StorageHDFSCluster::updateQueryToSendIfNeeded(DB::ASTPtr & query, const DB::StorageSnapshotPtr & storage_snapshot, const DB::ContextPtr & context) -{ - ASTExpressionList * expression_list = extractTableFunctionArgumentsFromSelectQuery(query); - if (!expression_list) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected SELECT query from table function hdfsCluster, got '{}'", queryToString(query)); - - TableFunctionHDFSCluster::updateStructureAndFormatArgumentsIfNeeded( - expression_list->children, storage_snapshot->metadata->getColumns().getAll().toNamesAndTypesDescription(), format_name, context); -} - - -RemoteQueryExecutor::Extension StorageHDFSCluster::getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const -{ - auto iterator = std::make_shared(uri, predicate, getVirtualsList(), context); - auto callback = std::make_shared>([iter = std::move(iterator)]() mutable -> String { return iter->next().path; }); - return RemoteQueryExecutor::Extension{.task_iterator = std::move(callback)}; -} - -} - -#endif diff --git a/src/Storages/HDFS/StorageHDFSCluster.h b/src/Storages/HDFS/StorageHDFSCluster.h deleted file mode 100644 index 0b5c6242aa9..00000000000 --- a/src/Storages/HDFS/StorageHDFSCluster.h +++ /dev/null @@ -1,51 +0,0 @@ -#pragma once - -#include "config.h" - -#if USE_HDFS - -#include -#include - -#include -#include -#include -#include - -namespace DB -{ - -class Context; - -class StorageHDFSCluster : public IStorageCluster -{ -public: - StorageHDFSCluster( - ContextPtr context_, - const String & cluster_name_, - const String & uri_, - const StorageID & table_id_, - const String & format_name_, - const ColumnsDescription & columns_, - const ConstraintsDescription & constraints_, - const String & compression_method); - - std::string getName() const override { return "HDFSCluster"; } - - RemoteQueryExecutor::Extension getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const override; - - bool supportsSubcolumns() const override { return true; } - - bool supportsTrivialCountOptimization(const StorageSnapshotPtr &, ContextPtr) const override { return true; } - -private: - void updateQueryToSendIfNeeded(ASTPtr & query, const StorageSnapshotPtr & storage_snapshot, const ContextPtr & context) override; - - String uri; - String format_name; -}; - - -} - -#endif diff --git a/src/Storages/Hive/HiveCommon.h b/src/Storages/Hive/HiveCommon.h index 0f9d3364ffd..81c167165d3 100644 --- a/src/Storages/Hive/HiveCommon.h +++ b/src/Storages/Hive/HiveCommon.h @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include diff --git a/src/Storages/Hive/HiveFile.h b/src/Storages/Hive/HiveFile.h index 536214e159f..a9468ce7d3d 100644 --- a/src/Storages/Hive/HiveFile.h +++ b/src/Storages/Hive/HiveFile.h @@ -14,7 +14,7 @@ #include #include #include -#include +#include namespace orc { @@ -65,8 +65,8 @@ public: {ORC_INPUT_FORMAT, FileFormat::ORC}, }; - static inline bool isFormatClass(const String & format_class) { return VALID_HDFS_FORMATS.contains(format_class); } - static inline FileFormat toFileFormat(const String & format_class) + static bool isFormatClass(const String & format_class) { return VALID_HDFS_FORMATS.contains(format_class); } + static FileFormat toFileFormat(const String & format_class) { if (isFormatClass(format_class)) { diff --git a/src/Storages/Hive/StorageHive.cpp b/src/Storages/Hive/StorageHive.cpp index b80bf8d7f46..28d8128e052 100644 --- a/src/Storages/Hive/StorageHive.cpp +++ b/src/Storages/Hive/StorageHive.cpp @@ -38,8 +38,8 @@ #include #include #include -#include -#include +#include +#include #include #include #include diff --git a/src/Storages/Hive/StorageHive.h b/src/Storages/Hive/StorageHive.h index 0fc1e3ff8d9..8a457dd6e01 100644 --- a/src/Storages/Hive/StorageHive.h +++ b/src/Storages/Hive/StorageHive.h @@ -9,7 +9,7 @@ #include #include -#include +#include #include #include diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index 920155bf689..adada5c15ba 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -27,11 +27,14 @@ namespace ErrorCodes extern const int CANNOT_RESTORE_TABLE; } -IStorage::IStorage(StorageID storage_id_) +IStorage::IStorage(StorageID storage_id_, std::unique_ptr metadata_) : storage_id(std::move(storage_id_)) - , metadata(std::make_unique()) , virtuals(std::make_unique()) { + if (metadata_) + metadata.set(std::move(metadata_)); + else + metadata.set(std::make_unique()); } bool IStorage::isVirtualColumn(const String & column_name, const StorageMetadataPtr & metadata_snapshot) const diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 63b59b1049c..0151db71340 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -98,7 +98,7 @@ class IStorage : public std::enable_shared_from_this, public TypePromo public: IStorage() = delete; /// Storage metadata can be set separately in setInMemoryMetadata method - explicit IStorage(StorageID storage_id_); + explicit IStorage(StorageID storage_id_, std::unique_ptr metadata_ = nullptr); IStorage(const IStorage &) = delete; IStorage & operator=(const IStorage &) = delete; @@ -171,8 +171,10 @@ public: /// This method can return true for readonly engines that return the same rows for reading (such as SystemNumbers) virtual bool supportsTransactions() const { return false; } + /// Returns true if the storage supports storing of data type Object. + virtual bool supportsDynamicSubcolumnsDeprecated() const { return false; } + /// Returns true if the storage supports storing of dynamic subcolumns. - /// For now it makes sense only for data type Object. virtual bool supportsDynamicSubcolumns() const { return false; } /// Requires squashing small blocks to large for optimal storage. @@ -258,6 +260,9 @@ public: /// Return true if storage can execute lightweight delete mutations. virtual bool supportsLightweightDelete() const { return false; } + /// Return true if storage has any projection. + virtual bool hasProjection() const { return false; } + /// Return true if storage can execute 'DELETE FROM' mutations. This is different from lightweight delete /// because those are internally translated into 'ALTER UDPATE' mutations. virtual bool supportsDelete() const { return false; } diff --git a/src/Storages/Kafka/KafkaConsumer.h b/src/Storages/Kafka/KafkaConsumer.h index f160d1c0855..a3bc97779b3 100644 --- a/src/Storages/Kafka/KafkaConsumer.h +++ b/src/Storages/Kafka/KafkaConsumer.h @@ -82,17 +82,17 @@ public: auto pollTimeout() const { return poll_timeout; } - inline bool hasMorePolledMessages() const + bool hasMorePolledMessages() const { return (stalled_status == NOT_STALLED) && (current != messages.end()); } - inline bool polledDataUnusable() const + bool polledDataUnusable() const { return (stalled_status != NOT_STALLED) && (stalled_status != NO_MESSAGES_RETURNED); } - inline bool isStalled() const { return stalled_status != NOT_STALLED; } + bool isStalled() const { return stalled_status != NOT_STALLED; } void storeLastReadMessageOffset(); void resetToLastCommitted(const char * msg); diff --git a/src/Storages/MergeTree/BackgroundProcessList.h b/src/Storages/MergeTree/BackgroundProcessList.h index c9a4887cca3..bf29aaf32d0 100644 --- a/src/Storages/MergeTree/BackgroundProcessList.h +++ b/src/Storages/MergeTree/BackgroundProcessList.h @@ -87,7 +87,7 @@ public: virtual void onEntryCreate(const Entry & /* entry */) {} virtual void onEntryDestroy(const Entry & /* entry */) {} - virtual inline ~BackgroundProcessList() = default; + virtual ~BackgroundProcessList() = default; }; } diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index e0437f4f715..3e785fd2dd2 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -2434,6 +2434,38 @@ void IMergeTreeDataPart::setBrokenReason(const String & message, int code) const exception_code = code; } +ColumnPtr IMergeTreeDataPart::getColumnSample(const NameAndTypePair & column) const +{ + const size_t total_mark = getMarksCount(); + /// If column doesn't have dynamic subcolumns or part has no data, just create column using it's type. + if (!column.type->hasDynamicSubcolumns() || !total_mark) + return column.type->createColumn(); + + /// Otherwise, read sample column with 0 rows from the part, so it will load dynamic structure. + NamesAndTypesList cols; + cols.emplace_back(column); + + StorageMetadataPtr metadata_ptr = storage.getInMemoryMetadataPtr(); + StorageSnapshotPtr storage_snapshot_ptr = std::make_shared(storage, metadata_ptr); + + MergeTreeReaderPtr reader = getReader( + cols, + storage_snapshot_ptr, + MarkRanges{MarkRange(0, 1)}, + /*virtual_fields=*/ {}, + /*uncompressed_cache=*/{}, + storage.getContext()->getMarkCache().get(), + std::make_shared(), + MergeTreeReaderSettings{}, + ValueSizeMap{}, + ReadBufferFromFileBase::ProfileCallback{}); + + Columns result; + result.resize(1); + reader->readRows(0, 1, false, 0, result); + return result[0]; +} + bool isCompactPart(const MergeTreeDataPartPtr & data_part) { return (data_part && data_part->getType() == MergeTreeDataPartType::Compact); diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index 9ee01c0efc4..bd3814bf415 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -43,7 +43,6 @@ class IReservation; using ReservationPtr = std::unique_ptr; class IMergeTreeReader; -class IMergeTreeDataPartWriter; class MarkCache; class UncompressedCache; class MergeTreeTransaction; @@ -74,7 +73,6 @@ public: using VirtualFields = std::unordered_map; using MergeTreeReaderPtr = std::unique_ptr; - using MergeTreeWriterPtr = std::unique_ptr; using ColumnSizeByName = std::unordered_map; using NameToNumber = std::unordered_map; @@ -106,15 +104,6 @@ public: const ValueSizeMap & avg_value_size_hints_, const ReadBufferFromFileBase::ProfileCallback & profile_callback_) const = 0; - virtual MergeTreeWriterPtr getWriter( - const NamesAndTypesList & columns_list, - const StorageMetadataPtr & metadata_snapshot, - const std::vector & indices_to_recalc, - const Statistics & stats_to_recalc_, - const CompressionCodecPtr & default_codec_, - const MergeTreeWriterSettings & writer_settings, - const MergeTreeIndexGranularity & computed_index_granularity) = 0; - virtual bool isStoredOnDisk() const = 0; virtual bool isStoredOnRemoteDisk() const = 0; @@ -166,8 +155,14 @@ public: NameAndTypePair getColumn(const String & name) const; std::optional tryGetColumn(const String & column_name) const; + /// Get sample column from part. For ordinary columns it just creates column using it's type. + /// For columns with dynamic structure it reads sample column with 0 rows from the part. + ColumnPtr getColumnSample(const NameAndTypePair & column) const; + const SerializationInfoByName & getSerializationInfos() const { return serialization_infos; } + const SerializationByName & getSerializations() const { return serializations; } + SerializationPtr getSerialization(const String & column_name) const; SerializationPtr tryGetSerialization(const String & column_name) const; @@ -197,6 +192,7 @@ public: /// take place, you must take original name of column for this part from /// storage and pass it to this method. std::optional getColumnPosition(const String & column_name) const; + const NameToNumber & getColumnPositions() const { return column_name_to_position; } /// Returns the name of a column with minimum compressed size (as returned by getColumnSize()). /// If no checksums are present returns the name of the first physically existing column. @@ -442,6 +438,8 @@ public: bool hasProjection(const String & projection_name) const { return projection_parts.contains(projection_name); } + bool hasProjection() const { return !projection_parts.empty(); } + bool hasBrokenProjection(const String & projection_name) const; /// Return true, if all projections were loaded successfully and none was marked as broken. @@ -464,23 +462,23 @@ public: /// File with compression codec name which was used to compress part columns /// by default. Some columns may have their own compression codecs, but /// default will be stored in this file. - static inline constexpr auto DEFAULT_COMPRESSION_CODEC_FILE_NAME = "default_compression_codec.txt"; + static constexpr auto DEFAULT_COMPRESSION_CODEC_FILE_NAME = "default_compression_codec.txt"; /// "delete-on-destroy.txt" is deprecated. It is no longer being created, only is removed. - static inline constexpr auto DELETE_ON_DESTROY_MARKER_FILE_NAME_DEPRECATED = "delete-on-destroy.txt"; + static constexpr auto DELETE_ON_DESTROY_MARKER_FILE_NAME_DEPRECATED = "delete-on-destroy.txt"; - static inline constexpr auto UUID_FILE_NAME = "uuid.txt"; + static constexpr auto UUID_FILE_NAME = "uuid.txt"; /// File that contains information about kinds of serialization of columns /// and information that helps to choose kind of serialization later during merging /// (number of rows, number of rows with default values, etc). - static inline constexpr auto SERIALIZATION_FILE_NAME = "serialization.json"; + static constexpr auto SERIALIZATION_FILE_NAME = "serialization.json"; /// Version used for transactions. - static inline constexpr auto TXN_VERSION_METADATA_FILE_NAME = "txn_version.txt"; + static constexpr auto TXN_VERSION_METADATA_FILE_NAME = "txn_version.txt"; - static inline constexpr auto METADATA_VERSION_FILE_NAME = "metadata_version.txt"; + static constexpr auto METADATA_VERSION_FILE_NAME = "metadata_version.txt"; /// One of part files which is used to check how many references (I'd like /// to say hardlinks, but it will confuse even more) we have for the part @@ -492,7 +490,7 @@ public: /// it was mutation without any change for source part. In this case we /// really don't need to remove data from remote FS and need only decrement /// reference counter locally. - static inline constexpr auto FILE_FOR_REFERENCES_CHECK = "checksums.txt"; + static constexpr auto FILE_FOR_REFERENCES_CHECK = "checksums.txt"; /// Checks that all TTLs (table min/max, column ttls, so on) for part /// calculated. Part without calculated TTL may exist if TTL was added after diff --git a/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp b/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp index 2488c63e309..891ba1b9660 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp @@ -3,6 +3,13 @@ namespace DB { +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; + extern const int NO_SUCH_COLUMN_IN_TABLE; +} + + Block getBlockAndPermute(const Block & block, const Names & names, const IColumn::Permutation * permutation) { Block result; @@ -38,18 +45,27 @@ Block permuteBlockIfNeeded(const Block & block, const IColumn::Permutation * per } IMergeTreeDataPartWriter::IMergeTreeDataPartWriter( - const MergeTreeMutableDataPartPtr & data_part_, + const String & data_part_name_, + const SerializationByName & serializations_, + MutableDataPartStoragePtr data_part_storage_, + const MergeTreeIndexGranularityInfo & index_granularity_info_, + const MergeTreeSettingsPtr & storage_settings_, const NamesAndTypesList & columns_list_, const StorageMetadataPtr & metadata_snapshot_, + const VirtualsDescriptionPtr & virtual_columns_, const MergeTreeWriterSettings & settings_, const MergeTreeIndexGranularity & index_granularity_) - : data_part(data_part_) - , storage(data_part_->storage) + : data_part_name(data_part_name_) + , serializations(serializations_) + , index_granularity_info(index_granularity_info_) + , storage_settings(storage_settings_) , metadata_snapshot(metadata_snapshot_) + , virtual_columns(virtual_columns_) , columns_list(columns_list_) , settings(settings_) - , index_granularity(index_granularity_) , with_final_mark(settings.can_use_adaptive_granularity) + , data_part_storage(data_part_storage_) + , index_granularity(index_granularity_) { } @@ -60,6 +76,102 @@ Columns IMergeTreeDataPartWriter::releaseIndexColumns() std::make_move_iterator(index_columns.end())); } +SerializationPtr IMergeTreeDataPartWriter::getSerialization(const String & column_name) const +{ + auto it = serializations.find(column_name); + if (it == serializations.end()) + throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, + "There is no column or subcolumn {} in part {}", column_name, data_part_name); + + return it->second; +} + +ASTPtr IMergeTreeDataPartWriter::getCodecDescOrDefault(const String & column_name, CompressionCodecPtr default_codec) const +{ + auto get_codec_or_default = [&](const auto & column_desc) + { + return column_desc.codec ? column_desc.codec : default_codec->getFullCodecDesc(); + }; + + const auto & columns = metadata_snapshot->getColumns(); + if (const auto * column_desc = columns.tryGet(column_name)) + return get_codec_or_default(*column_desc); + + if (const auto * virtual_desc = virtual_columns->tryGetDescription(column_name)) + return get_codec_or_default(*virtual_desc); + + return default_codec->getFullCodecDesc(); +} + + IMergeTreeDataPartWriter::~IMergeTreeDataPartWriter() = default; + +MergeTreeDataPartWriterPtr createMergeTreeDataPartCompactWriter( + const String & data_part_name_, + const String & logger_name_, + const SerializationByName & serializations_, + MutableDataPartStoragePtr data_part_storage_, + const MergeTreeIndexGranularityInfo & index_granularity_info_, + const MergeTreeSettingsPtr & storage_settings_, + const NamesAndTypesList & columns_list, + const ColumnPositions & column_positions, + const StorageMetadataPtr & metadata_snapshot, + const VirtualsDescriptionPtr & virtual_columns, + const std::vector & indices_to_recalc, + const Statistics & stats_to_recalc_, + const String & marks_file_extension_, + const CompressionCodecPtr & default_codec_, + const MergeTreeWriterSettings & writer_settings, + const MergeTreeIndexGranularity & computed_index_granularity); + +MergeTreeDataPartWriterPtr createMergeTreeDataPartWideWriter( + const String & data_part_name_, + const String & logger_name_, + const SerializationByName & serializations_, + MutableDataPartStoragePtr data_part_storage_, + const MergeTreeIndexGranularityInfo & index_granularity_info_, + const MergeTreeSettingsPtr & storage_settings_, + const NamesAndTypesList & columns_list, + const StorageMetadataPtr & metadata_snapshot, + const VirtualsDescriptionPtr & virtual_columns, + const std::vector & indices_to_recalc, + const Statistics & stats_to_recalc_, + const String & marks_file_extension_, + const CompressionCodecPtr & default_codec_, + const MergeTreeWriterSettings & writer_settings, + const MergeTreeIndexGranularity & computed_index_granularity); + + +MergeTreeDataPartWriterPtr createMergeTreeDataPartWriter( + MergeTreeDataPartType part_type, + const String & data_part_name_, + const String & logger_name_, + const SerializationByName & serializations_, + MutableDataPartStoragePtr data_part_storage_, + const MergeTreeIndexGranularityInfo & index_granularity_info_, + const MergeTreeSettingsPtr & storage_settings_, + const NamesAndTypesList & columns_list, + const ColumnPositions & column_positions, + const StorageMetadataPtr & metadata_snapshot, + const VirtualsDescriptionPtr & virtual_columns, + const std::vector & indices_to_recalc, + const Statistics & stats_to_recalc_, + const String & marks_file_extension_, + const CompressionCodecPtr & default_codec_, + const MergeTreeWriterSettings & writer_settings, + const MergeTreeIndexGranularity & computed_index_granularity) +{ + if (part_type == MergeTreeDataPartType::Compact) + return createMergeTreeDataPartCompactWriter(data_part_name_, logger_name_, serializations_, data_part_storage_, + index_granularity_info_, storage_settings_, columns_list, column_positions, metadata_snapshot, virtual_columns, indices_to_recalc, stats_to_recalc_, + marks_file_extension_, default_codec_, writer_settings, computed_index_granularity); + else if (part_type == MergeTreeDataPartType::Wide) + return createMergeTreeDataPartWideWriter(data_part_name_, logger_name_, serializations_, data_part_storage_, + index_granularity_info_, storage_settings_, columns_list, metadata_snapshot, virtual_columns, indices_to_recalc, stats_to_recalc_, + marks_file_extension_, default_codec_, writer_settings, computed_index_granularity); + else + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown part type: {}", part_type.toString()); +} + } diff --git a/src/Storages/MergeTree/IMergeTreeDataPartWriter.h b/src/Storages/MergeTree/IMergeTreeDataPartWriter.h index 3f359904ddd..f04beb37ebb 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPartWriter.h +++ b/src/Storages/MergeTree/IMergeTreeDataPartWriter.h @@ -1,12 +1,13 @@ #pragma once -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include namespace DB @@ -22,9 +23,14 @@ class IMergeTreeDataPartWriter : private boost::noncopyable { public: IMergeTreeDataPartWriter( - const MergeTreeMutableDataPartPtr & data_part_, + const String & data_part_name_, + const SerializationByName & serializations_, + MutableDataPartStoragePtr data_part_storage_, + const MergeTreeIndexGranularityInfo & index_granularity_info_, + const MergeTreeSettingsPtr & storage_settings_, const NamesAndTypesList & columns_list_, const StorageMetadataPtr & metadata_snapshot_, + const VirtualsDescriptionPtr & virtual_columns_, const MergeTreeWriterSettings & settings_, const MergeTreeIndexGranularity & index_granularity_ = {}); @@ -32,7 +38,7 @@ public: virtual void write(const Block & block, const IColumn::Permutation * permutation) = 0; - virtual void fillChecksums(IMergeTreeDataPart::Checksums & checksums, NameSet & checksums_to_remove) = 0; + virtual void fillChecksums(MergeTreeDataPartChecksums & checksums, NameSet & checksums_to_remove) = 0; virtual void finish(bool sync) = 0; @@ -40,16 +46,48 @@ public: const MergeTreeIndexGranularity & getIndexGranularity() const { return index_granularity; } protected: + SerializationPtr getSerialization(const String & column_name) const; - const MergeTreeMutableDataPartPtr data_part; - const MergeTreeData & storage; + ASTPtr getCodecDescOrDefault(const String & column_name, CompressionCodecPtr default_codec) const; + + IDataPartStorage & getDataPartStorage() { return *data_part_storage; } + + const String data_part_name; + /// Serializations for every columns and subcolumns by their names. + const SerializationByName serializations; + const MergeTreeIndexGranularityInfo index_granularity_info; + const MergeTreeSettingsPtr storage_settings; const StorageMetadataPtr metadata_snapshot; + const VirtualsDescriptionPtr virtual_columns; const NamesAndTypesList columns_list; const MergeTreeWriterSettings settings; - MergeTreeIndexGranularity index_granularity; const bool with_final_mark; + MutableDataPartStoragePtr data_part_storage; MutableColumns index_columns; + MergeTreeIndexGranularity index_granularity; }; +using MergeTreeDataPartWriterPtr = std::unique_ptr; +using ColumnPositions = std::unordered_map; + +MergeTreeDataPartWriterPtr createMergeTreeDataPartWriter( + MergeTreeDataPartType part_type, + const String & data_part_name_, + const String & logger_name_, + const SerializationByName & serializations_, + MutableDataPartStoragePtr data_part_storage_, + const MergeTreeIndexGranularityInfo & index_granularity_info_, + const MergeTreeSettingsPtr & storage_settings_, + const NamesAndTypesList & columns_list, + const ColumnPositions & column_positions, + const StorageMetadataPtr & metadata_snapshot, + const VirtualsDescriptionPtr & virtual_columns_, + const std::vector & indices_to_recalc, + const Statistics & stats_to_recalc_, + const String & marks_file_extension, + const CompressionCodecPtr & default_codec_, + const MergeTreeWriterSettings & writer_settings, + const MergeTreeIndexGranularity & computed_index_granularity); + } diff --git a/src/Storages/MergeTree/IMergedBlockOutputStream.cpp b/src/Storages/MergeTree/IMergedBlockOutputStream.cpp index c8d6aa0ba65..89c813ab233 100644 --- a/src/Storages/MergeTree/IMergedBlockOutputStream.cpp +++ b/src/Storages/MergeTree/IMergedBlockOutputStream.cpp @@ -7,20 +7,21 @@ namespace DB { IMergedBlockOutputStream::IMergedBlockOutputStream( - const MergeTreeMutableDataPartPtr & data_part, + const MergeTreeSettingsPtr & storage_settings_, + MutableDataPartStoragePtr data_part_storage_, const StorageMetadataPtr & metadata_snapshot_, const NamesAndTypesList & columns_list, bool reset_columns_) - : storage(data_part->storage) + : storage_settings(storage_settings_) , metadata_snapshot(metadata_snapshot_) - , data_part_storage(data_part->getDataPartStoragePtr()) + , data_part_storage(data_part_storage_) , reset_columns(reset_columns_) { if (reset_columns) { SerializationInfo::Settings info_settings = { - .ratio_of_defaults_for_sparse = storage.getSettings()->ratio_of_defaults_for_sparse_serialization, + .ratio_of_defaults_for_sparse = storage_settings->ratio_of_defaults_for_sparse_serialization, .choose_kind = false, }; @@ -42,7 +43,7 @@ NameSet IMergedBlockOutputStream::removeEmptyColumnsFromPart( return {}; for (const auto & column : empty_columns) - LOG_TRACE(storage.log, "Skipping expired/empty column {} for part {}", column, data_part->name); + LOG_TRACE(data_part->storage.log, "Skipping expired/empty column {} for part {}", column, data_part->name); /// Collect counts for shared streams of different columns. As an example, Nested columns have shared stream with array sizes. std::map stream_counts; @@ -91,7 +92,7 @@ NameSet IMergedBlockOutputStream::removeEmptyColumnsFromPart( } else /// If we have no file in checksums it doesn't exist on disk { - LOG_TRACE(storage.log, "Files {} doesn't exist in checksums so it doesn't exist on disk, will not try to remove it", *itr); + LOG_TRACE(data_part->storage.log, "Files {} doesn't exist in checksums so it doesn't exist on disk, will not try to remove it", *itr); itr = remove_files.erase(itr); } } diff --git a/src/Storages/MergeTree/IMergedBlockOutputStream.h b/src/Storages/MergeTree/IMergedBlockOutputStream.h index ca4e3899b29..a9b058418ea 100644 --- a/src/Storages/MergeTree/IMergedBlockOutputStream.h +++ b/src/Storages/MergeTree/IMergedBlockOutputStream.h @@ -1,10 +1,12 @@ #pragma once -#include "Storages/MergeTree/IDataPartStorage.h" +#include +#include #include #include #include #include +#include namespace DB { @@ -13,7 +15,8 @@ class IMergedBlockOutputStream { public: IMergedBlockOutputStream( - const MergeTreeMutableDataPartPtr & data_part, + const MergeTreeSettingsPtr & storage_settings_, + MutableDataPartStoragePtr data_part_storage_, const StorageMetadataPtr & metadata_snapshot_, const NamesAndTypesList & columns_list, bool reset_columns_); @@ -39,11 +42,13 @@ protected: SerializationInfoByName & serialization_infos, MergeTreeData::DataPart::Checksums & checksums); - const MergeTreeData & storage; + MergeTreeSettingsPtr storage_settings; + LoggerPtr log; + StorageMetadataPtr metadata_snapshot; MutableDataPartStoragePtr data_part_storage; - IMergeTreeDataPart::MergeTreeWriterPtr writer; + MergeTreeDataPartWriterPtr writer; bool reset_columns = false; SerializationInfoByName new_serialization_infos; diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp index 849240502e4..bd8642b9f66 100644 --- a/src/Storages/MergeTree/KeyCondition.cpp +++ b/src/Storages/MergeTree/KeyCondition.cpp @@ -2664,6 +2664,13 @@ BoolMask KeyCondition::checkInHyperrectangle( else if (element.function == RPNElement::FUNCTION_IN_RANGE || element.function == RPNElement::FUNCTION_NOT_IN_RANGE) { + if (element.key_column >= hyperrectangle.size()) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Hyperrectangle size is {}, but requested element at posittion {} ({})", + hyperrectangle.size(), element.key_column, element.toString()); + } + const Range * key_range = &hyperrectangle[element.key_column]; /// The case when the column is wrapped in a chain of possibly monotonic functions. diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp index a9109832521..be7e8874b30 100644 --- a/src/Storages/MergeTree/MergeTask.cpp +++ b/src/Storages/MergeTree/MergeTask.cpp @@ -9,7 +9,7 @@ #include #include #include - +#include #include #include #include @@ -34,6 +34,7 @@ #include #include #include +#include #include namespace DB @@ -378,7 +379,7 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() MergeTreeIndexFactory::instance().getMany(global_ctx->metadata_snapshot->getSecondaryIndices()), MergeTreeStatisticsFactory::instance().getMany(global_ctx->metadata_snapshot->getColumns()), ctx->compression_codec, - global_ctx->txn, + global_ctx->txn ? global_ctx->txn->tid : Tx::PrehistoricTID, /*reset_columns=*/ true, ctx->blocks_are_granules_size, global_ctx->context->getWriteSettings()); @@ -596,8 +597,9 @@ void MergeTask::VerticalMergeStage::prepareVerticalMergeForOneColumn() const pipes.emplace_back(std::move(pipe)); } - auto pipe = Pipe::unitePipes(std::move(pipes)); + bool is_result_sparse = global_ctx->new_data_part->getSerialization(column_name)->getKind() == ISerialization::Kind::SPARSE; + auto pipe = Pipe::unitePipes(std::move(pipes)); ctx->rows_sources_read_buf->seek(0, 0); const auto data_settings = global_ctx->data->getSettings(); @@ -606,7 +608,8 @@ void MergeTask::VerticalMergeStage::prepareVerticalMergeForOneColumn() const pipe.numOutputPorts(), *ctx->rows_sources_read_buf, data_settings->merge_max_block_size, - data_settings->merge_max_block_size_bytes); + data_settings->merge_max_block_size_bytes, + is_result_sparse); pipe.addTransform(std::move(transform)); diff --git a/src/Storages/MergeTree/MergeTreeBlockReadUtils.h b/src/Storages/MergeTree/MergeTreeBlockReadUtils.h index b19c42c8db8..c1514416301 100644 --- a/src/Storages/MergeTree/MergeTreeBlockReadUtils.h +++ b/src/Storages/MergeTree/MergeTreeBlockReadUtils.h @@ -41,13 +41,13 @@ struct MergeTreeBlockSizePredictor void update(const Block & sample_block, const Columns & columns, size_t num_rows, double decay = calculateDecay()); /// Return current block size (after update()) - inline size_t getBlockSize() const + size_t getBlockSize() const { return block_size_bytes; } /// Predicts what number of rows should be read to exhaust byte quota per column - inline size_t estimateNumRowsForMaxSizeColumn(size_t bytes_quota) const + size_t estimateNumRowsForMaxSizeColumn(size_t bytes_quota) const { double max_size_per_row = std::max(std::max(max_size_per_row_fixed, 1), max_size_per_row_dynamic); return (bytes_quota > block_size_rows * max_size_per_row) @@ -56,14 +56,14 @@ struct MergeTreeBlockSizePredictor } /// Predicts what number of rows should be read to exhaust byte quota per block - inline size_t estimateNumRows(size_t bytes_quota) const + size_t estimateNumRows(size_t bytes_quota) const { return (bytes_quota > block_size_bytes) ? static_cast((bytes_quota - block_size_bytes) / std::max(1, static_cast(bytes_per_row_current))) : 0; } - inline void updateFilteredRowsRation(size_t rows_was_read, size_t rows_was_filtered, double decay = calculateDecay()) + void updateFilteredRowsRation(size_t rows_was_read, size_t rows_was_filtered, double decay = calculateDecay()) { double alpha = std::pow(1. - decay, rows_was_read); double current_ration = rows_was_filtered / std::max(1.0, static_cast(rows_was_read)); diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 6f89bb62d62..f32ba3ee0bc 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -188,6 +189,7 @@ namespace ErrorCodes extern const int CANNOT_SCHEDULE_TASK; extern const int LIMIT_EXCEEDED; extern const int CANNOT_FORGET_PARTITION; + extern const int DATA_TYPE_CANNOT_BE_USED_IN_KEY; } static void checkSuspiciousIndices(const ASTFunction * index_function) @@ -3873,7 +3875,7 @@ void MergeTreeData::checkPartDynamicColumns(MutableDataPartPtr & part, DataParts continue; auto storage_column = columns.getPhysical(part_column.name); - if (!storage_column.type->hasDynamicSubcolumns()) + if (!storage_column.type->hasDynamicSubcolumnsDeprecated()) continue; auto concrete_storage_column = object_columns.getPhysical(part_column.name); @@ -6133,6 +6135,21 @@ bool MergeTreeData::supportsLightweightDelete() const return true; } +bool MergeTreeData::hasProjection() const +{ + auto lock = lockParts(); + for (const auto & part : data_parts_by_info) + { + if (part->getState() == MergeTreeDataPartState::Outdated + || part->getState() == MergeTreeDataPartState::Deleting) + continue; + + if (part->hasProjection()) + return true; + } + return false; +} + MergeTreeData::ProjectionPartsVector MergeTreeData::getAllProjectionPartsVector(MergeTreeData::DataPartStateVector * out_states) const { ProjectionPartsVector res; @@ -8478,7 +8495,7 @@ std::pair MergeTreeData::createE MergedBlockOutputStream out(new_data_part, metadata_snapshot, columns, index_factory.getMany(metadata_snapshot->getSecondaryIndices()), Statistics{}, - compression_codec, txn); + compression_codec, txn ? txn->tid : Tx::PrehistoricTID); bool sync_on_insert = settings->fsync_after_insert; @@ -8541,6 +8558,16 @@ void MergeTreeData::unloadPrimaryKeys() } } +void MergeTreeData::verifySortingKey(const KeyDescription & sorting_key) +{ + /// Aggregate functions already forbidden, but SimpleAggregateFunction are not + for (const auto & data_type : sorting_key.data_types) + { + if (dynamic_cast(data_type->getCustomName())) + throw Exception(ErrorCodes::DATA_TYPE_CANNOT_BE_USED_IN_KEY, "Column with type {} is not allowed in key expression", data_type->getCustomName()->getName()); + } +} + bool updateAlterConversionsMutations(const MutationCommands & commands, std::atomic & alter_conversions_mutations, bool remove) { for (const auto & command : commands) diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 2f9283659e3..fb8f2ec29aa 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -434,10 +434,13 @@ public: bool supportsTTL() const override { return true; } + bool supportsDynamicSubcolumnsDeprecated() const override { return true; } bool supportsDynamicSubcolumns() const override { return true; } bool supportsLightweightDelete() const override; + bool hasProjection() const override; + bool areAsynchronousInsertsEnabled() const override { return getSettings()->async_insert; } bool supportsTrivialCountOptimization(const StorageSnapshotPtr &, ContextPtr) const override; @@ -736,6 +739,8 @@ public: const ASTPtr & new_settings, AlterLockHolder & table_lock_holder); + static void verifySortingKey(const KeyDescription & sorting_key); + /// Should be called if part data is suspected to be corrupted. /// Has the ability to check all other parts /// which reside on the same disk of the suspicious part. diff --git a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp index 418b2d8f81b..4a160e5e229 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp @@ -47,26 +47,36 @@ IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartCompact::getReader( avg_value_size_hints, profile_callback, CLOCK_MONOTONIC_COARSE); } -IMergeTreeDataPart::MergeTreeWriterPtr MergeTreeDataPartCompact::getWriter( +MergeTreeDataPartWriterPtr createMergeTreeDataPartCompactWriter( + const String & data_part_name_, + const String & logger_name_, + const SerializationByName & serializations_, + MutableDataPartStoragePtr data_part_storage_, + const MergeTreeIndexGranularityInfo & index_granularity_info_, + const MergeTreeSettingsPtr & storage_settings_, const NamesAndTypesList & columns_list, + const ColumnPositions & column_positions, const StorageMetadataPtr & metadata_snapshot, + const VirtualsDescriptionPtr & virtual_columns, const std::vector & indices_to_recalc, const Statistics & stats_to_recalc_, + const String & marks_file_extension_, const CompressionCodecPtr & default_codec_, const MergeTreeWriterSettings & writer_settings, const MergeTreeIndexGranularity & computed_index_granularity) { NamesAndTypesList ordered_columns_list; std::copy_if(columns_list.begin(), columns_list.end(), std::back_inserter(ordered_columns_list), - [this](const auto & column) { return getColumnPosition(column.name) != std::nullopt; }); + [&column_positions](const auto & column) { return column_positions.contains(column.name); }); /// Order of writing is important in compact format - ordered_columns_list.sort([this](const auto & lhs, const auto & rhs) - { return *getColumnPosition(lhs.name) < *getColumnPosition(rhs.name); }); + ordered_columns_list.sort([&column_positions](const auto & lhs, const auto & rhs) + { return column_positions.at(lhs.name) < column_positions.at(rhs.name); }); return std::make_unique( - shared_from_this(), ordered_columns_list, metadata_snapshot, - indices_to_recalc, stats_to_recalc_, getMarksFileExtension(), + data_part_name_, logger_name_, serializations_, data_part_storage_, + index_granularity_info_, storage_settings_, ordered_columns_list, metadata_snapshot, virtual_columns, + indices_to_recalc, stats_to_recalc_, marks_file_extension_, default_codec_, writer_settings, computed_index_granularity); } diff --git a/src/Storages/MergeTree/MergeTreeDataPartCompact.h b/src/Storages/MergeTree/MergeTreeDataPartCompact.h index 3a4e7b95f33..1fb84424774 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartCompact.h +++ b/src/Storages/MergeTree/MergeTreeDataPartCompact.h @@ -40,15 +40,6 @@ public: const ValueSizeMap & avg_value_size_hints, const ReadBufferFromFileBase::ProfileCallback & profile_callback) const override; - MergeTreeWriterPtr getWriter( - const NamesAndTypesList & columns_list, - const StorageMetadataPtr & metadata_snapshot, - const std::vector & indices_to_recalc, - const Statistics & stats_to_recalc_, - const CompressionCodecPtr & default_codec_, - const MergeTreeWriterSettings & writer_settings, - const MergeTreeIndexGranularity & computed_index_granularity) override; - bool isStoredOnDisk() const override { return true; } bool isStoredOnRemoteDisk() const override; diff --git a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp index fc3108e522a..149f86cef00 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp @@ -53,19 +53,28 @@ IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartWide::getReader( profile_callback); } -IMergeTreeDataPart::MergeTreeWriterPtr MergeTreeDataPartWide::getWriter( +MergeTreeDataPartWriterPtr createMergeTreeDataPartWideWriter( + const String & data_part_name_, + const String & logger_name_, + const SerializationByName & serializations_, + MutableDataPartStoragePtr data_part_storage_, + const MergeTreeIndexGranularityInfo & index_granularity_info_, + const MergeTreeSettingsPtr & storage_settings_, const NamesAndTypesList & columns_list, const StorageMetadataPtr & metadata_snapshot, + const VirtualsDescriptionPtr & virtual_columns, const std::vector & indices_to_recalc, const Statistics & stats_to_recalc_, + const String & marks_file_extension_, const CompressionCodecPtr & default_codec_, const MergeTreeWriterSettings & writer_settings, const MergeTreeIndexGranularity & computed_index_granularity) { return std::make_unique( - shared_from_this(), columns_list, - metadata_snapshot, indices_to_recalc, stats_to_recalc_, - getMarksFileExtension(), + data_part_name_, logger_name_, serializations_, data_part_storage_, + index_granularity_info_, storage_settings_, columns_list, + metadata_snapshot, virtual_columns, indices_to_recalc, stats_to_recalc_, + marks_file_extension_, default_codec_, writer_settings, computed_index_granularity); } diff --git a/src/Storages/MergeTree/MergeTreeDataPartWide.h b/src/Storages/MergeTree/MergeTreeDataPartWide.h index 84eeec4211b..7465e08b7c4 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWide.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWide.h @@ -35,15 +35,6 @@ public: const ValueSizeMap & avg_value_size_hints, const ReadBufferFromFileBase::ProfileCallback & profile_callback) const override; - MergeTreeWriterPtr getWriter( - const NamesAndTypesList & columns_list, - const StorageMetadataPtr & metadata_snapshot, - const std::vector & indices_to_recalc, - const Statistics & stats_to_recalc_, - const CompressionCodecPtr & default_codec_, - const MergeTreeWriterSettings & writer_settings, - const MergeTreeIndexGranularity & computed_index_granularity) override; - bool isStoredOnDisk() const override { return true; } bool isStoredOnRemoteDisk() const override; diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp index 1605e5cdb9a..fb0f0ba9154 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp @@ -10,32 +10,41 @@ namespace ErrorCodes } MergeTreeDataPartWriterCompact::MergeTreeDataPartWriterCompact( - const MergeTreeMutableDataPartPtr & data_part_, + const String & data_part_name_, + const String & logger_name_, + const SerializationByName & serializations_, + MutableDataPartStoragePtr data_part_storage_, + const MergeTreeIndexGranularityInfo & index_granularity_info_, + const MergeTreeSettingsPtr & storage_settings_, const NamesAndTypesList & columns_list_, const StorageMetadataPtr & metadata_snapshot_, + const VirtualsDescriptionPtr & virtual_columns_, const std::vector & indices_to_recalc_, const Statistics & stats_to_recalc, const String & marks_file_extension_, const CompressionCodecPtr & default_codec_, const MergeTreeWriterSettings & settings_, const MergeTreeIndexGranularity & index_granularity_) - : MergeTreeDataPartWriterOnDisk(data_part_, columns_list_, metadata_snapshot_, + : MergeTreeDataPartWriterOnDisk( + data_part_name_, logger_name_, serializations_, + data_part_storage_, index_granularity_info_, storage_settings_, + columns_list_, metadata_snapshot_, virtual_columns_, indices_to_recalc_, stats_to_recalc, marks_file_extension_, default_codec_, settings_, index_granularity_) - , plain_file(data_part_->getDataPartStorage().writeFile( + , plain_file(getDataPartStorage().writeFile( MergeTreeDataPartCompact::DATA_FILE_NAME_WITH_EXTENSION, settings.max_compress_block_size, settings_.query_write_settings)) , plain_hashing(*plain_file) { - marks_file = data_part_->getDataPartStorage().writeFile( + marks_file = getDataPartStorage().writeFile( MergeTreeDataPartCompact::DATA_FILE_NAME + marks_file_extension_, 4096, settings_.query_write_settings); marks_file_hashing = std::make_unique(*marks_file); - if (data_part_->index_granularity_info.mark_type.compressed) + if (index_granularity_info.mark_type.compressed) { marks_compressor = std::make_unique( *marks_file_hashing, @@ -45,20 +54,35 @@ MergeTreeDataPartWriterCompact::MergeTreeDataPartWriterCompact( marks_source_hashing = std::make_unique(*marks_compressor); } - auto storage_snapshot = std::make_shared(data_part->storage, metadata_snapshot); for (const auto & column : columns_list) { - auto compression = storage_snapshot->getCodecDescOrDefault(column.name, default_codec); - addStreams(column, compression); + auto compression = getCodecDescOrDefault(column.name, default_codec); + addStreams(column, nullptr, compression); } } -void MergeTreeDataPartWriterCompact::addStreams(const NameAndTypePair & column, const ASTPtr & effective_codec_desc) +void MergeTreeDataPartWriterCompact::initDynamicStreamsIfNeeded(const Block & block) +{ + if (is_dynamic_streams_initialized) + return; + + is_dynamic_streams_initialized = true; + for (const auto & column : columns_list) + { + if (column.type->hasDynamicSubcolumns()) + { + auto compression = getCodecDescOrDefault(column.name, default_codec); + addStreams(column, block.getByName(column.name).column, compression); + } + } +} + +void MergeTreeDataPartWriterCompact::addStreams(const NameAndTypePair & name_and_type, const ColumnPtr & column, const ASTPtr & effective_codec_desc) { ISerialization::StreamCallback callback = [&](const auto & substream_path) { assert(!substream_path.empty()); - String stream_name = ISerialization::getFileNameForStream(column, substream_path); + String stream_name = ISerialization::getFileNameForStream(name_and_type, substream_path); /// Shared offsets for Nested type. if (compressed_streams.contains(stream_name)) @@ -81,7 +105,7 @@ void MergeTreeDataPartWriterCompact::addStreams(const NameAndTypePair & column, compressed_streams.emplace(stream_name, stream); }; - data_part->getSerialization(column.name)->enumerateStreams(callback, column.type); + getSerialization(name_and_type.name)->enumerateStreams(callback, name_and_type.type, column); } namespace @@ -138,6 +162,7 @@ void writeColumnSingleGranule( serialize_settings.getter = stream_getter; serialize_settings.position_independent_encoding = true; serialize_settings.low_cardinality_max_dictionary_size = 0; + serialize_settings.dynamic_write_statistics = ISerialization::SerializeBinaryBulkSettings::DynamicStatisticsMode::PREFIX; serialization->serializeBinaryBulkStatePrefix(*column.column, serialize_settings, state); serialization->serializeBinaryBulkWithMultipleStreams(*column.column, from_row, number_of_rows, serialize_settings, state); @@ -148,6 +173,9 @@ void writeColumnSingleGranule( void MergeTreeDataPartWriterCompact::write(const Block & block, const IColumn::Permutation * permutation) { + /// On first block of data initialize streams for dynamic subcolumns. + initDynamicStreamsIfNeeded(block); + /// Fill index granularity for this block /// if it's unknown (in case of insert data or horizontal merge, /// but not in case of vertical merge) @@ -230,7 +258,7 @@ void MergeTreeDataPartWriterCompact::writeDataBlock(const Block & block, const G writeBinaryLittleEndian(static_cast(0), marks_out); writeColumnSingleGranule( - block.getByName(name_and_type->name), data_part->getSerialization(name_and_type->name), + block.getByName(name_and_type->name), getSerialization(name_and_type->name), stream_getter, granule.start_row, granule.rows_to_write); /// Each type always have at least one substream @@ -241,7 +269,7 @@ void MergeTreeDataPartWriterCompact::writeDataBlock(const Block & block, const G } } -void MergeTreeDataPartWriterCompact::fillDataChecksums(IMergeTreeDataPart::Checksums & checksums) +void MergeTreeDataPartWriterCompact::fillDataChecksums(MergeTreeDataPartChecksums & checksums) { if (columns_buffer.size() != 0) { @@ -411,7 +439,7 @@ size_t MergeTreeDataPartWriterCompact::ColumnsBuffer::size() const return accumulated_columns.at(0)->size(); } -void MergeTreeDataPartWriterCompact::fillChecksums(IMergeTreeDataPart::Checksums & checksums, NameSet & /*checksums_to_remove*/) +void MergeTreeDataPartWriterCompact::fillChecksums(MergeTreeDataPartChecksums & checksums, NameSet & /*checksums_to_remove*/) { // If we don't have anything to write, skip finalization. if (!columns_list.empty()) diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h index ddb6178dce6..a5527b74e69 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h @@ -11,9 +11,15 @@ class MergeTreeDataPartWriterCompact : public MergeTreeDataPartWriterOnDisk { public: MergeTreeDataPartWriterCompact( - const MergeTreeMutableDataPartPtr & data_part, + const String & data_part_name_, + const String & logger_name_, + const SerializationByName & serializations_, + MutableDataPartStoragePtr data_part_storage_, + const MergeTreeIndexGranularityInfo & index_granularity_info_, + const MergeTreeSettingsPtr & storage_settings_, const NamesAndTypesList & columns_list, const StorageMetadataPtr & metadata_snapshot_, + const VirtualsDescriptionPtr & virtual_columns_, const std::vector & indices_to_recalc, const Statistics & stats_to_recalc, const String & marks_file_extension, @@ -23,12 +29,12 @@ public: void write(const Block & block, const IColumn::Permutation * permutation) override; - void fillChecksums(IMergeTreeDataPart::Checksums & checksums, NameSet & checksums_to_remove) override; + void fillChecksums(MergeTreeDataPartChecksums & checksums, NameSet & checksums_to_remove) override; void finish(bool sync) override; private: /// Finish serialization of the data. Flush rows in buffer to disk, compute checksums. - void fillDataChecksums(IMergeTreeDataPart::Checksums & checksums); + void fillDataChecksums(MergeTreeDataPartChecksums & checksums); void finishDataSerialization(bool sync); void fillIndexGranularity(size_t index_granularity_for_block, size_t rows_in_block) override; @@ -42,7 +48,9 @@ private: void addToChecksums(MergeTreeDataPartChecksums & checksums); - void addStreams(const NameAndTypePair & column, const ASTPtr & effective_codec_desc); + void addStreams(const NameAndTypePair & name_and_type, const ColumnPtr & column, const ASTPtr & effective_codec_desc); + + void initDynamicStreamsIfNeeded(const Block & block); Block header; @@ -96,6 +104,8 @@ private: /// then finally to 'marks_file'. std::unique_ptr marks_compressor; std::unique_ptr marks_source_hashing; + + bool is_dynamic_streams_initialized = false; }; } diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp index 491d2399b82..0a8920790e0 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp @@ -140,16 +140,24 @@ void MergeTreeDataPartWriterOnDisk::Stream::addToChecksums(Merg MergeTreeDataPartWriterOnDisk::MergeTreeDataPartWriterOnDisk( - const MergeTreeMutableDataPartPtr & data_part_, + const String & data_part_name_, + const String & logger_name_, + const SerializationByName & serializations_, + MutableDataPartStoragePtr data_part_storage_, + const MergeTreeIndexGranularityInfo & index_granularity_info_, + const MergeTreeSettingsPtr & storage_settings_, const NamesAndTypesList & columns_list_, const StorageMetadataPtr & metadata_snapshot_, + const VirtualsDescriptionPtr & virtual_columns_, const MergeTreeIndices & indices_to_recalc_, const Statistics & stats_to_recalc_, const String & marks_file_extension_, const CompressionCodecPtr & default_codec_, const MergeTreeWriterSettings & settings_, const MergeTreeIndexGranularity & index_granularity_) - : IMergeTreeDataPartWriter(data_part_, columns_list_, metadata_snapshot_, settings_, index_granularity_) + : IMergeTreeDataPartWriter( + data_part_name_, serializations_, data_part_storage_, index_granularity_info_, + storage_settings_, columns_list_, metadata_snapshot_, virtual_columns_, settings_, index_granularity_) , skip_indices(indices_to_recalc_) , stats(stats_to_recalc_) , marks_file_extension(marks_file_extension_) @@ -157,14 +165,14 @@ MergeTreeDataPartWriterOnDisk::MergeTreeDataPartWriterOnDisk( , compute_granularity(index_granularity.empty()) , compress_primary_key(settings.compress_primary_key) , execution_stats(skip_indices.size(), stats.size()) - , log(getLogger(storage.getLogName() + " (DataPartWriter)")) + , log(getLogger(logger_name_ + " (DataPartWriter)")) { if (settings.blocks_are_granules_size && !index_granularity.empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't take information about index granularity from blocks, when non empty index_granularity array specified"); - if (!data_part->getDataPartStorage().exists()) - data_part->getDataPartStorage().createDirectories(); + if (!getDataPartStorage().exists()) + getDataPartStorage().createDirectories(); if (settings.rewrite_primary_key) initPrimaryIndex(); @@ -223,7 +231,6 @@ static size_t computeIndexGranularityImpl( size_t MergeTreeDataPartWriterOnDisk::computeIndexGranularity(const Block & block) const { - const auto storage_settings = storage.getSettings(); return computeIndexGranularityImpl( block, storage_settings->index_granularity_bytes, @@ -237,7 +244,7 @@ void MergeTreeDataPartWriterOnDisk::initPrimaryIndex() if (metadata_snapshot->hasPrimaryKey()) { String index_name = "primary" + getIndexExtension(compress_primary_key); - index_file_stream = data_part->getDataPartStorage().writeFile(index_name, DBMS_DEFAULT_BUFFER_SIZE, settings.query_write_settings); + index_file_stream = getDataPartStorage().writeFile(index_name, DBMS_DEFAULT_BUFFER_SIZE, settings.query_write_settings); index_file_hashing_stream = std::make_unique(*index_file_stream); if (compress_primary_key) @@ -256,7 +263,7 @@ void MergeTreeDataPartWriterOnDisk::initStatistics() String stats_name = stat_ptr->getFileName(); stats_streams.emplace_back(std::make_unique>( stats_name, - data_part->getDataPartStoragePtr(), + data_part_storage, stats_name, STAT_FILE_SUFFIX, default_codec, settings.max_compress_block_size, settings.query_write_settings)); @@ -275,7 +282,7 @@ void MergeTreeDataPartWriterOnDisk::initSkipIndices() skip_indices_streams.emplace_back( std::make_unique>( stream_name, - data_part->getDataPartStoragePtr(), + data_part_storage, stream_name, skip_index->getSerializedFileExtension(), stream_name, marks_file_extension, default_codec, settings.max_compress_block_size, @@ -285,7 +292,7 @@ void MergeTreeDataPartWriterOnDisk::initSkipIndices() GinIndexStorePtr store = nullptr; if (typeid_cast(&*skip_index) != nullptr) { - store = std::make_shared(stream_name, data_part->getDataPartStoragePtr(), data_part->getDataPartStoragePtr(), storage.getSettings()->max_digestion_size_per_segment); + store = std::make_shared(stream_name, data_part_storage, data_part_storage, storage_settings->max_digestion_size_per_segment); gin_index_stores[stream_name] = store; } skip_indices_aggregators.push_back(skip_index->createIndexAggregatorForPart(store, settings)); @@ -498,7 +505,7 @@ void MergeTreeDataPartWriterOnDisk::finishStatisticsSerialization(bool sync) } for (size_t i = 0; i < stats.size(); ++i) - LOG_DEBUG(log, "Spent {} ms calculating statistics {} for the part {}", execution_stats.statistics_build_us[i] / 1000, stats[i]->columnName(), data_part->name); + LOG_DEBUG(log, "Spent {} ms calculating statistics {} for the part {}", execution_stats.statistics_build_us[i] / 1000, stats[i]->columnName(), data_part_name); } void MergeTreeDataPartWriterOnDisk::fillStatisticsChecksums(MergeTreeData::DataPart::Checksums & checksums) @@ -524,7 +531,7 @@ void MergeTreeDataPartWriterOnDisk::finishSkipIndicesSerialization(bool sync) store.second->finalize(); for (size_t i = 0; i < skip_indices.size(); ++i) - LOG_DEBUG(log, "Spent {} ms calculating index {} for the part {}", execution_stats.skip_indices_build_us[i] / 1000, skip_indices[i]->index.name, data_part->name); + LOG_DEBUG(log, "Spent {} ms calculating index {} for the part {}", execution_stats.skip_indices_build_us[i] / 1000, skip_indices[i]->index.name, data_part_name); gin_index_stores.clear(); skip_indices_streams.clear(); diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h index 9f2cc3970fa..0c31cabc8c4 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h @@ -5,9 +5,6 @@ #include #include #include -#include -#include -#include #include #include #include @@ -97,16 +94,22 @@ public: void sync() const; - void addToChecksums(IMergeTreeDataPart::Checksums & checksums); + void addToChecksums(MergeTreeDataPartChecksums & checksums); }; using StreamPtr = std::unique_ptr>; using StatisticStreamPtr = std::unique_ptr>; MergeTreeDataPartWriterOnDisk( - const MergeTreeMutableDataPartPtr & data_part_, + const String & data_part_name_, + const String & logger_name_, + const SerializationByName & serializations_, + MutableDataPartStoragePtr data_part_storage_, + const MergeTreeIndexGranularityInfo & index_granularity_info_, + const MergeTreeSettingsPtr & storage_settings_, const NamesAndTypesList & columns_list, const StorageMetadataPtr & metadata_snapshot_, + const VirtualsDescriptionPtr & virtual_columns_, const std::vector & indices_to_recalc, const Statistics & stats_to_recalc_, const String & marks_file_extension, @@ -133,13 +136,13 @@ protected: void calculateAndSerializeStatistics(const Block & stats_block); /// Finishes primary index serialization: write final primary index row (if required) and compute checksums - void fillPrimaryIndexChecksums(MergeTreeData::DataPart::Checksums & checksums); + void fillPrimaryIndexChecksums(MergeTreeDataPartChecksums & checksums); void finishPrimaryIndexSerialization(bool sync); /// Finishes skip indices serialization: write all accumulated data to disk and compute checksums - void fillSkipIndicesChecksums(MergeTreeData::DataPart::Checksums & checksums); + void fillSkipIndicesChecksums(MergeTreeDataPartChecksums & checksums); void finishSkipIndicesSerialization(bool sync); - void fillStatisticsChecksums(MergeTreeData::DataPart::Checksums & checksums); + void fillStatisticsChecksums(MergeTreeDataPartChecksums & checksums); void finishStatisticsSerialization(bool sync); /// Get global number of the current which we are writing (or going to start to write) diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp index 6a3b08d4d65..afa14d8a98a 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp @@ -76,37 +76,62 @@ Granules getGranulesToWrite(const MergeTreeIndexGranularity & index_granularity, } MergeTreeDataPartWriterWide::MergeTreeDataPartWriterWide( - const MergeTreeMutableDataPartPtr & data_part_, + const String & data_part_name_, + const String & logger_name_, + const SerializationByName & serializations_, + MutableDataPartStoragePtr data_part_storage_, + const MergeTreeIndexGranularityInfo & index_granularity_info_, + const MergeTreeSettingsPtr & storage_settings_, const NamesAndTypesList & columns_list_, const StorageMetadataPtr & metadata_snapshot_, + const VirtualsDescriptionPtr & virtual_columns_, const std::vector & indices_to_recalc_, const Statistics & stats_to_recalc_, const String & marks_file_extension_, const CompressionCodecPtr & default_codec_, const MergeTreeWriterSettings & settings_, const MergeTreeIndexGranularity & index_granularity_) - : MergeTreeDataPartWriterOnDisk(data_part_, columns_list_, metadata_snapshot_, - indices_to_recalc_, stats_to_recalc_, marks_file_extension_, - default_codec_, settings_, index_granularity_) + : MergeTreeDataPartWriterOnDisk( + data_part_name_, logger_name_, serializations_, + data_part_storage_, index_granularity_info_, storage_settings_, + columns_list_, metadata_snapshot_, virtual_columns_, + indices_to_recalc_, stats_to_recalc_, marks_file_extension_, + default_codec_, settings_, index_granularity_) { - auto storage_snapshot = std::make_shared(data_part->storage, metadata_snapshot); for (const auto & column : columns_list) { - auto compression = storage_snapshot->getCodecDescOrDefault(column.name, default_codec); - addStreams(column, compression); + auto compression = getCodecDescOrDefault(column.name, default_codec); + addStreams(column, nullptr, compression); + } +} + +void MergeTreeDataPartWriterWide::initDynamicStreamsIfNeeded(const DB::Block & block) +{ + if (is_dynamic_streams_initialized) + return; + + is_dynamic_streams_initialized = true; + block_sample = block.cloneEmpty(); + for (const auto & column : columns_list) + { + if (column.type->hasDynamicSubcolumns()) + { + auto compression = getCodecDescOrDefault(column.name, default_codec); + addStreams(column, block_sample.getByName(column.name).column, compression); + } } } void MergeTreeDataPartWriterWide::addStreams( - const NameAndTypePair & column, + const NameAndTypePair & name_and_type, + const ColumnPtr & column, const ASTPtr & effective_codec_desc) { ISerialization::StreamCallback callback = [&](const auto & substream_path) { assert(!substream_path.empty()); - auto storage_settings = storage.getSettings(); - auto full_stream_name = ISerialization::getFileNameForStream(column, substream_path); + auto full_stream_name = ISerialization::getFileNameForStream(name_and_type, substream_path); String stream_name; if (storage_settings->replace_long_file_name_to_hash && full_stream_name.size() > storage_settings->max_file_name_length) @@ -114,6 +139,10 @@ void MergeTreeDataPartWriterWide::addStreams( else stream_name = full_stream_name; + /// Shared offsets for Nested type. + if (column_streams.contains(stream_name)) + return; + auto it = stream_name_to_full_name.find(stream_name); if (it != stream_name_to_full_name.end() && it->second != full_stream_name) throw Exception(ErrorCodes::INCORRECT_FILE_NAME, @@ -121,10 +150,6 @@ void MergeTreeDataPartWriterWide::addStreams( " It is a collision between a filename for one column and a hash of filename for another column or a bug", stream_name, it->second, full_stream_name); - /// Shared offsets for Nested type. - if (column_streams.contains(stream_name)) - return; - const auto & subtype = substream_path.back().data.type; CompressionCodecPtr compression_codec; @@ -138,7 +163,7 @@ void MergeTreeDataPartWriterWide::addStreams( auto ast = parseQuery(codec_parser, "(" + Poco::toUpper(settings.marks_compression_codec) + ")", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); CompressionCodecPtr marks_compression_codec = CompressionCodecFactory::instance().get(ast, nullptr); - const auto column_desc = metadata_snapshot->columns.tryGetColumnDescription(GetColumnsOptions(GetColumnsOptions::AllPhysical), column.getNameInStorage()); + const auto column_desc = metadata_snapshot->columns.tryGetColumnDescription(GetColumnsOptions(GetColumnsOptions::AllPhysical), name_and_type.getNameInStorage()); UInt64 max_compress_block_size = 0; if (column_desc) @@ -149,7 +174,7 @@ void MergeTreeDataPartWriterWide::addStreams( column_streams[stream_name] = std::make_unique>( stream_name, - data_part->getDataPartStoragePtr(), + data_part_storage, stream_name, DATA_FILE_EXTENSION, stream_name, marks_file_extension, compression_codec, @@ -163,7 +188,7 @@ void MergeTreeDataPartWriterWide::addStreams( }; ISerialization::SubstreamPath path; - data_part->getSerialization(column.name)->enumerateStreams(callback, column.type); + getSerialization(name_and_type.name)->enumerateStreams(callback, name_and_type.type, column); } const String & MergeTreeDataPartWriterWide::getStreamName( @@ -222,6 +247,9 @@ void MergeTreeDataPartWriterWide::shiftCurrentMark(const Granules & granules_wri void MergeTreeDataPartWriterWide::write(const Block & block, const IColumn::Permutation * permutation) { + /// On first block of data initialize streams for dynamic subcolumns. + initDynamicStreamsIfNeeded(block); + /// Fill index granularity for this block /// if it's unknown (in case of insert data or horizontal merge, /// but not in case of vertical part of vertical merge) @@ -264,7 +292,7 @@ void MergeTreeDataPartWriterWide::write(const Block & block, const IColumn::Perm { auto & column = block_to_write.getByName(it->name); - if (data_part->getSerialization(it->name)->getKind() != ISerialization::Kind::SPARSE) + if (getSerialization(it->name)->getKind() != ISerialization::Kind::SPARSE) column.column = recursiveRemoveSparse(column.column); if (permutation) @@ -302,11 +330,12 @@ void MergeTreeDataPartWriterWide::write(const Block & block, const IColumn::Perm } void MergeTreeDataPartWriterWide::writeSingleMark( - const NameAndTypePair & column, + const NameAndTypePair & name_and_type, WrittenOffsetColumns & offset_columns, size_t number_of_rows) { - StreamsWithMarks marks = getCurrentMarksForColumn(column, offset_columns); + auto * sample_column = block_sample.findByName(name_and_type.name); + StreamsWithMarks marks = getCurrentMarksForColumn(name_and_type, sample_column ? sample_column->column : nullptr, offset_columns); for (const auto & mark : marks) flushMarkToFile(mark, number_of_rows); } @@ -323,21 +352,22 @@ void MergeTreeDataPartWriterWide::flushMarkToFile(const StreamNameAndMark & stre } StreamsWithMarks MergeTreeDataPartWriterWide::getCurrentMarksForColumn( - const NameAndTypePair & column, + const NameAndTypePair & name_and_type, + const ColumnPtr & column_sample, WrittenOffsetColumns & offset_columns) { StreamsWithMarks result; - const auto column_desc = metadata_snapshot->columns.tryGetColumnDescription(GetColumnsOptions(GetColumnsOptions::AllPhysical), column.getNameInStorage()); + const auto column_desc = metadata_snapshot->columns.tryGetColumnDescription(GetColumnsOptions(GetColumnsOptions::AllPhysical), name_and_type.getNameInStorage()); UInt64 min_compress_block_size = 0; if (column_desc) if (const auto * value = column_desc->settings.tryGet("min_compress_block_size")) min_compress_block_size = value->safeGet(); if (!min_compress_block_size) min_compress_block_size = settings.min_compress_block_size; - data_part->getSerialization(column.name)->enumerateStreams([&] (const ISerialization::SubstreamPath & substream_path) + getSerialization(name_and_type.name)->enumerateStreams([&] (const ISerialization::SubstreamPath & substream_path) { bool is_offsets = !substream_path.empty() && substream_path.back().type == ISerialization::Substream::ArraySizes; - auto stream_name = getStreamName(column, substream_path); + auto stream_name = getStreamName(name_and_type, substream_path); /// Don't write offsets more than one time for Nested type. if (is_offsets && offset_columns.contains(stream_name)) @@ -355,7 +385,7 @@ StreamsWithMarks MergeTreeDataPartWriterWide::getCurrentMarksForColumn( stream_with_mark.mark.offset_in_decompressed_block = stream.compressed_hashing.offset(); result.push_back(stream_with_mark); - }); + }, name_and_type.type, column_sample); return result; } @@ -368,7 +398,7 @@ void MergeTreeDataPartWriterWide::writeSingleGranule( ISerialization::SerializeBinaryBulkSettings & serialize_settings, const Granule & granule) { - const auto & serialization = data_part->getSerialization(name_and_type.name); + const auto & serialization = getSerialization(name_and_type.name); serialization->serializeBinaryBulkWithMultipleStreams(column, granule.start_row, granule.rows_to_write, serialize_settings, serialization_state); /// So that instead of the marks pointing to the end of the compressed block, there were marks pointing to the beginning of the next one. @@ -382,7 +412,7 @@ void MergeTreeDataPartWriterWide::writeSingleGranule( return; column_streams.at(stream_name)->compressed_hashing.nextIfAtEnd(); - }); + }, name_and_type.type, column.getPtr()); } /// Column must not be empty. (column.size() !== 0) @@ -398,7 +428,7 @@ void MergeTreeDataPartWriterWide::writeColumn( const auto & [name, type] = name_and_type; auto [it, inserted] = serialization_states.emplace(name, nullptr); - auto serialization = data_part->getSerialization(name_and_type.name); + auto serialization = getSerialization(name_and_type.name); if (inserted) { @@ -407,11 +437,10 @@ void MergeTreeDataPartWriterWide::writeColumn( serialization->serializeBinaryBulkStatePrefix(column, serialize_settings, it->second); } - const auto & global_settings = storage.getContext()->getSettingsRef(); ISerialization::SerializeBinaryBulkSettings serialize_settings; serialize_settings.getter = createStreamGetter(name_and_type, offset_columns); - serialize_settings.low_cardinality_max_dictionary_size = global_settings.low_cardinality_max_dictionary_size; - serialize_settings.low_cardinality_use_single_dictionary_for_part = global_settings.low_cardinality_use_single_dictionary_for_part != 0; + serialize_settings.low_cardinality_max_dictionary_size = settings.low_cardinality_max_dictionary_size; + serialize_settings.low_cardinality_use_single_dictionary_for_part = settings.low_cardinality_use_single_dictionary_for_part; for (const auto & granule : granules) { @@ -424,7 +453,7 @@ void MergeTreeDataPartWriterWide::writeColumn( "We have to add new mark for column, but already have non written mark. " "Current mark {}, total marks {}, offset {}", getCurrentMark(), index_granularity.getMarksCount(), rows_written_in_last_mark); - last_non_written_marks[name] = getCurrentMarksForColumn(name_and_type, offset_columns); + last_non_written_marks[name] = getCurrentMarksForColumn(name_and_type, column.getPtr(), offset_columns); } writeSingleGranule( @@ -453,14 +482,14 @@ void MergeTreeDataPartWriterWide::writeColumn( bool is_offsets = !substream_path.empty() && substream_path.back().type == ISerialization::Substream::ArraySizes; if (is_offsets) offset_columns.insert(getStreamName(name_and_type, substream_path)); - }); + }, name_and_type.type, column.getPtr()); } void MergeTreeDataPartWriterWide::validateColumnOfFixedSize(const NameAndTypePair & name_type) { const auto & [name, type] = name_type; - const auto & serialization = data_part->getSerialization(name_type.name); + const auto & serialization = getSerialization(name_type.name); if (!type->isValueRepresentedByNumber() || type->haveSubtypes() || serialization->getKind() != ISerialization::Kind::DEFAULT) throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot validate column of non fixed type {}", type->getName()); @@ -470,21 +499,21 @@ void MergeTreeDataPartWriterWide::validateColumnOfFixedSize(const NameAndTypePai String bin_path = escaped_name + DATA_FILE_EXTENSION; /// Some columns may be removed because of ttl. Skip them. - if (!data_part->getDataPartStorage().exists(mrk_path)) + if (!getDataPartStorage().exists(mrk_path)) return; - auto mrk_file_in = data_part->getDataPartStorage().readFile(mrk_path, {}, std::nullopt, std::nullopt); + auto mrk_file_in = getDataPartStorage().readFile(mrk_path, {}, std::nullopt, std::nullopt); std::unique_ptr mrk_in; - if (data_part->index_granularity_info.mark_type.compressed) + if (index_granularity_info.mark_type.compressed) mrk_in = std::make_unique(std::move(mrk_file_in)); else mrk_in = std::move(mrk_file_in); - DB::CompressedReadBufferFromFile bin_in(data_part->getDataPartStorage().readFile(bin_path, {}, std::nullopt, std::nullopt)); + DB::CompressedReadBufferFromFile bin_in(getDataPartStorage().readFile(bin_path, {}, std::nullopt, std::nullopt)); bool must_be_last = false; UInt64 offset_in_compressed_file = 0; UInt64 offset_in_decompressed_block = 0; - UInt64 index_granularity_rows = data_part->index_granularity_info.fixed_index_granularity; + UInt64 index_granularity_rows = index_granularity_info.fixed_index_granularity; size_t mark_num; @@ -500,7 +529,7 @@ void MergeTreeDataPartWriterWide::validateColumnOfFixedSize(const NameAndTypePai if (settings.can_use_adaptive_granularity) readBinaryLittleEndian(index_granularity_rows, *mrk_in); else - index_granularity_rows = data_part->index_granularity_info.fixed_index_granularity; + index_granularity_rows = index_granularity_info.fixed_index_granularity; if (must_be_last) { @@ -533,7 +562,7 @@ void MergeTreeDataPartWriterWide::validateColumnOfFixedSize(const NameAndTypePai ErrorCodes::LOGICAL_ERROR, "Incorrect mark rows for part {} for mark #{}" " (compressed offset {}, decompressed offset {}), in-memory {}, on disk {}, total marks {}", - data_part->getDataPartStorage().getFullPath(), + getDataPartStorage().getFullPath(), mark_num, offset_in_compressed_file, offset_in_decompressed_block, index_granularity.getMarkRows(mark_num), index_granularity_rows, index_granularity.getMarksCount()); @@ -591,15 +620,13 @@ void MergeTreeDataPartWriterWide::validateColumnOfFixedSize(const NameAndTypePai " index granularity size {}, last rows {}", column->size(), mark_num, index_granularity.getMarksCount(), index_granularity_rows); } - } -void MergeTreeDataPartWriterWide::fillDataChecksums(IMergeTreeDataPart::Checksums & checksums, NameSet & checksums_to_remove) +void MergeTreeDataPartWriterWide::fillDataChecksums(MergeTreeDataPartChecksums & checksums, NameSet & checksums_to_remove) { - const auto & global_settings = storage.getContext()->getSettingsRef(); ISerialization::SerializeBinaryBulkSettings serialize_settings; - serialize_settings.low_cardinality_max_dictionary_size = global_settings.low_cardinality_max_dictionary_size; - serialize_settings.low_cardinality_use_single_dictionary_for_part = global_settings.low_cardinality_use_single_dictionary_for_part != 0; + serialize_settings.low_cardinality_max_dictionary_size = settings.low_cardinality_max_dictionary_size; + serialize_settings.low_cardinality_use_single_dictionary_for_part = settings.low_cardinality_use_single_dictionary_for_part; WrittenOffsetColumns offset_columns; if (rows_written_in_last_mark > 0) { @@ -622,7 +649,8 @@ void MergeTreeDataPartWriterWide::fillDataChecksums(IMergeTreeDataPart::Checksum if (!serialization_states.empty()) { serialize_settings.getter = createStreamGetter(*it, written_offset_columns ? *written_offset_columns : offset_columns); - data_part->getSerialization(it->name)->serializeBinaryBulkStateSuffix(serialize_settings, serialization_states[it->name]); + serialize_settings.dynamic_write_statistics = ISerialization::SerializeBinaryBulkSettings::DynamicStatisticsMode::SUFFIX; + getSerialization(it->name)->serializeBinaryBulkStateSuffix(serialize_settings, serialization_states[it->name]); } if (write_final_mark) @@ -665,7 +693,7 @@ void MergeTreeDataPartWriterWide::finishDataSerialization(bool sync) { if (column.type->isValueRepresentedByNumber() && !column.type->haveSubtypes() - && data_part->getSerialization(column.name)->getKind() == ISerialization::Kind::DEFAULT) + && getSerialization(column.name)->getKind() == ISerialization::Kind::DEFAULT) { validateColumnOfFixedSize(column); } @@ -674,7 +702,7 @@ void MergeTreeDataPartWriterWide::finishDataSerialization(bool sync) } -void MergeTreeDataPartWriterWide::fillChecksums(IMergeTreeDataPart::Checksums & checksums, NameSet & checksums_to_remove) +void MergeTreeDataPartWriterWide::fillChecksums(MergeTreeDataPartChecksums & checksums, NameSet & checksums_to_remove) { // If we don't have anything to write, skip finalization. if (!columns_list.empty()) @@ -703,17 +731,17 @@ void MergeTreeDataPartWriterWide::finish(bool sync) } void MergeTreeDataPartWriterWide::writeFinalMark( - const NameAndTypePair & column, + const NameAndTypePair & name_and_type, WrittenOffsetColumns & offset_columns) { - writeSingleMark(column, offset_columns, 0); + writeSingleMark(name_and_type, offset_columns, 0); /// Memoize information about offsets - data_part->getSerialization(column.name)->enumerateStreams([&] (const ISerialization::SubstreamPath & substream_path) + getSerialization(name_and_type.name)->enumerateStreams([&] (const ISerialization::SubstreamPath & substream_path) { bool is_offsets = !substream_path.empty() && substream_path.back().type == ISerialization::Substream::ArraySizes; if (is_offsets) - offset_columns.insert(getStreamName(column, substream_path)); - }); + offset_columns.insert(getStreamName(name_and_type, substream_path)); + }, name_and_type.type, block_sample.getByName(name_and_type.name).column); } static void fillIndexGranularityImpl( diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h index f5ff323563d..9d18ac76880 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h @@ -21,9 +21,15 @@ class MergeTreeDataPartWriterWide : public MergeTreeDataPartWriterOnDisk { public: MergeTreeDataPartWriterWide( - const MergeTreeMutableDataPartPtr & data_part, + const String & data_part_name_, + const String & logger_name_, + const SerializationByName & serializations_, + MutableDataPartStoragePtr data_part_storage_, + const MergeTreeIndexGranularityInfo & index_granularity_info_, + const MergeTreeSettingsPtr & storage_settings_, const NamesAndTypesList & columns_list, const StorageMetadataPtr & metadata_snapshot, + const VirtualsDescriptionPtr & virtual_columns_, const std::vector & indices_to_recalc, const Statistics & stats_to_recalc_, const String & marks_file_extension, @@ -33,14 +39,14 @@ public: void write(const Block & block, const IColumn::Permutation * permutation) override; - void fillChecksums(IMergeTreeDataPart::Checksums & checksums, NameSet & checksums_to_remove) final; + void fillChecksums(MergeTreeDataPartChecksums & checksums, NameSet & checksums_to_remove) final; void finish(bool sync) final; private: /// Finish serialization of data: write final mark if required and compute checksums /// Also validate written data in debug mode - void fillDataChecksums(IMergeTreeDataPart::Checksums & checksums, NameSet & checksums_to_remove); + void fillDataChecksums(MergeTreeDataPartChecksums & checksums, NameSet & checksums_to_remove); void finishDataSerialization(bool sync); /// Write data of one column. @@ -63,7 +69,8 @@ private: /// Take offsets from column and return as MarkInCompressed file with stream name StreamsWithMarks getCurrentMarksForColumn( - const NameAndTypePair & column, + const NameAndTypePair & name_and_type, + const ColumnPtr & column_sample, WrittenOffsetColumns & offset_columns); /// Write mark to disk using stream and rows count @@ -73,18 +80,21 @@ private: /// Write mark for column taking offsets from column stream void writeSingleMark( - const NameAndTypePair & column, + const NameAndTypePair & name_and_type, WrittenOffsetColumns & offset_columns, size_t number_of_rows); void writeFinalMark( - const NameAndTypePair & column, + const NameAndTypePair & name_and_type, WrittenOffsetColumns & offset_columns); void addStreams( - const NameAndTypePair & column, + const NameAndTypePair & name_and_type, + const ColumnPtr & column, const ASTPtr & effective_codec_desc); + void initDynamicStreamsIfNeeded(const Block & block); + /// Method for self check (used in debug-build only). Checks that written /// data and corresponding marks are consistent. Otherwise throws logical /// errors. @@ -129,6 +139,10 @@ private: /// How many rows we have already written in the current mark. /// More than zero when incoming blocks are smaller then their granularity. size_t rows_written_in_last_mark = 0; + + Block block_sample; + + bool is_dynamic_streams_initialized = false; }; } diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index daa163d741c..426e36ce9a9 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -422,7 +422,7 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPartImpl( auto columns = metadata_snapshot->getColumns().getAllPhysical().filter(block.getNames()); for (auto & column : columns) - if (column.type->hasDynamicSubcolumns()) + if (column.type->hasDynamicSubcolumnsDeprecated()) column.type = block.getByName(column.name).type; auto minmax_idx = std::make_shared(); @@ -600,7 +600,7 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPartImpl( indices, MergeTreeStatisticsFactory::instance().getMany(metadata_snapshot->getColumns()), compression_codec, - context->getCurrentTransaction(), + context->getCurrentTransaction() ? context->getCurrentTransaction()->tid : Tx::PrehistoricTID, false, false, context->getWriteSettings()); @@ -738,7 +738,7 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeProjectionPartImpl( MergeTreeIndices{}, Statistics{}, /// TODO(hanfei): It should be helpful to write statistics for projection result. compression_codec, - NO_TRANSACTION_PTR, + Tx::PrehistoricTID, false, false, data.getContext()->getWriteSettings()); out->writeWithPermutation(block, perm_ptr); diff --git a/src/Storages/MergeTree/MergeTreeIOSettings.h b/src/Storages/MergeTree/MergeTreeIOSettings.h index 562bfe7c439..a9125b4047e 100644 --- a/src/Storages/MergeTree/MergeTreeIOSettings.h +++ b/src/Storages/MergeTree/MergeTreeIOSettings.h @@ -74,6 +74,8 @@ struct MergeTreeWriterSettings , blocks_are_granules_size(blocks_are_granules_size_) , query_write_settings(query_write_settings_) , max_threads_for_annoy_index_creation(global_settings.max_threads_for_annoy_index_creation) + , low_cardinality_max_dictionary_size(global_settings.low_cardinality_max_dictionary_size) + , low_cardinality_use_single_dictionary_for_part(global_settings.low_cardinality_use_single_dictionary_for_part != 0) { } @@ -93,6 +95,9 @@ struct MergeTreeWriterSettings WriteSettings query_write_settings; size_t max_threads_for_annoy_index_creation; + + size_t low_cardinality_max_dictionary_size; + bool low_cardinality_use_single_dictionary_for_part; }; } diff --git a/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.h b/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.h index 85006c3ffde..87445c99ade 100644 --- a/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.h +++ b/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.h @@ -64,8 +64,8 @@ public: std::string describe() const; }; -constexpr inline auto getNonAdaptiveMrkSizeWide() { return sizeof(UInt64) * 2; } -constexpr inline auto getAdaptiveMrkSizeWide() { return sizeof(UInt64) * 3; } +constexpr auto getNonAdaptiveMrkSizeWide() { return sizeof(UInt64) * 2; } +constexpr auto getAdaptiveMrkSizeWide() { return sizeof(UInt64) * 3; } inline size_t getAdaptiveMrkSizeCompact(size_t columns_num); } diff --git a/src/Storages/MergeTree/MergeTreeIndexSet.cpp b/src/Storages/MergeTree/MergeTreeIndexSet.cpp index 3e5cbb34556..86319796435 100644 --- a/src/Storages/MergeTree/MergeTreeIndexSet.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexSet.cpp @@ -113,7 +113,7 @@ void MergeTreeIndexGranuleSet::deserializeBinary(ReadBuffer & istr, MergeTreeInd ISerialization::DeserializeBinaryBulkStatePtr state; auto serialization = elem.type->getDefaultSerialization(); - serialization->deserializeBinaryBulkStatePrefix(settings, state); + serialization->deserializeBinaryBulkStatePrefix(settings, state, nullptr); serialization->deserializeBinaryBulkWithMultipleStreams(elem.column, rows_to_read, settings, state, nullptr); } } diff --git a/src/Storages/MergeTree/MergeTreePartition.cpp b/src/Storages/MergeTree/MergeTreePartition.cpp index cd18e8d5a28..b240f80ee13 100644 --- a/src/Storages/MergeTree/MergeTreePartition.cpp +++ b/src/Storages/MergeTree/MergeTreePartition.cpp @@ -413,12 +413,12 @@ void MergeTreePartition::load(const MergeTreeData & storage, const PartMetadataM partition_key_sample.getByPosition(i).type->getDefaultSerialization()->deserializeBinary(value[i], *file, {}); } -std::unique_ptr MergeTreePartition::store(const MergeTreeData & storage, IDataPartStorage & data_part_storage, MergeTreeDataPartChecksums & checksums) const +std::unique_ptr MergeTreePartition::store( + StorageMetadataPtr metadata_snapshot, ContextPtr storage_context, + IDataPartStorage & data_part_storage, MergeTreeDataPartChecksums & checksums) const { - auto metadata_snapshot = storage.getInMemoryMetadataPtr(); - const auto & context = storage.getContext(); - const auto & partition_key_sample = adjustPartitionKey(metadata_snapshot, storage.getContext()).sample_block; - return store(partition_key_sample, data_part_storage, checksums, context->getWriteSettings()); + const auto & partition_key_sample = adjustPartitionKey(metadata_snapshot, storage_context).sample_block; + return store(partition_key_sample, data_part_storage, checksums, storage_context->getWriteSettings()); } std::unique_ptr MergeTreePartition::store(const Block & partition_key_sample, IDataPartStorage & data_part_storage, MergeTreeDataPartChecksums & checksums, const WriteSettings & settings) const diff --git a/src/Storages/MergeTree/MergeTreePartition.h b/src/Storages/MergeTree/MergeTreePartition.h index 78b141f26ec..44def70bdd9 100644 --- a/src/Storages/MergeTree/MergeTreePartition.h +++ b/src/Storages/MergeTree/MergeTreePartition.h @@ -44,7 +44,9 @@ public: /// Store functions return write buffer with written but not finalized data. /// User must call finish() for returned object. - [[nodiscard]] std::unique_ptr store(const MergeTreeData & storage, IDataPartStorage & data_part_storage, MergeTreeDataPartChecksums & checksums) const; + [[nodiscard]] std::unique_ptr store( + StorageMetadataPtr metadata_snapshot, ContextPtr storage_context, + IDataPartStorage & data_part_storage, MergeTreeDataPartChecksums & checksums) const; [[nodiscard]] std::unique_ptr store(const Block & partition_key_sample, IDataPartStorage & data_part_storage, MergeTreeDataPartChecksums & checksums, const WriteSettings & settings) const; void assign(const MergeTreePartition & other) { value = other.value; } diff --git a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp index 34fb214a1ce..a2b8f0ad96f 100644 --- a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp @@ -196,7 +196,7 @@ void MergeTreeReaderCompact::readPrefix( deserialize_settings.getter = buffer_getter_for_prefix; ISerialization::DeserializeBinaryBulkStatePtr state_for_prefix; - serialization_for_prefix->deserializeBinaryBulkStatePrefix(deserialize_settings, state_for_prefix); + serialization_for_prefix->deserializeBinaryBulkStatePrefix(deserialize_settings, state_for_prefix, nullptr); } SerializationPtr serialization; @@ -206,7 +206,8 @@ void MergeTreeReaderCompact::readPrefix( serialization = getSerializationInPart(name_and_type); deserialize_settings.getter = buffer_getter; - serialization->deserializeBinaryBulkStatePrefix(deserialize_settings, deserialize_binary_bulk_state_map[name_and_type.name]); + deserialize_settings.dynamic_read_statistics = true; + serialization->deserializeBinaryBulkStatePrefix(deserialize_settings, deserialize_binary_bulk_state_map[name_and_type.name], nullptr); } catch (Exception & e) { diff --git a/src/Storages/MergeTree/MergeTreeReaderWide.cpp b/src/Storages/MergeTree/MergeTreeReaderWide.cpp index 59feb4dda19..b6882fdced9 100644 --- a/src/Storages/MergeTree/MergeTreeReaderWide.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderWide.cpp @@ -43,6 +43,8 @@ MergeTreeReaderWide::MergeTreeReaderWide( mark_ranges_, settings_, avg_value_size_hints_) + , profile_callback(profile_callback_) + , clock_type(clock_type_) , read_without_marks( settings.can_read_part_without_marks && all_mark_ranges.isOneRangeForWholePart(data_part_info_for_read->getMarksCount())) @@ -50,7 +52,7 @@ MergeTreeReaderWide::MergeTreeReaderWide( try { for (size_t i = 0; i < columns_to_read.size(); ++i) - addStreams(columns_to_read[i], serializations[i], profile_callback_, clock_type_); + addStreams(columns_to_read[i], serializations[i]); } catch (...) { @@ -103,9 +105,10 @@ void MergeTreeReaderWide::prefetchForAllColumns( try { auto & cache = caches[columns_to_read[pos].getNameInStorage()]; + auto & deserialize_states_cache = deserialize_states_caches[columns_to_read[pos].getNameInStorage()]; prefetchForColumn( priority, columns_to_read[pos], serializations[pos], from_mark, continue_reading, - current_task_last_mark, cache); + current_task_last_mark, cache, deserialize_states_cache); } catch (Exception & e) { @@ -150,11 +153,12 @@ size_t MergeTreeReaderWide::readRows( { size_t column_size_before_reading = column->size(); auto & cache = caches[column_to_read.getNameInStorage()]; + auto & deserialize_states_cache = deserialize_states_caches[column_to_read.getNameInStorage()]; readData( column_to_read, serializations[pos], column, from_mark, continue_reading, current_task_last_mark, - max_rows_to_read, cache, /* was_prefetched =*/ !prefetched_streams.empty()); + max_rows_to_read, cache, deserialize_states_cache, /* was_prefetched =*/ !prefetched_streams.empty()); /// For elements of Nested, column_size_before_reading may be greater than column size /// if offsets are not empty and were already read, but elements are empty. @@ -202,9 +206,7 @@ size_t MergeTreeReaderWide::readRows( void MergeTreeReaderWide::addStreams( const NameAndTypePair & name_and_type, - const SerializationPtr & serialization, - const ReadBufferFromFileBase::ProfileCallback & profile_callback, - clockid_t clock_type) + const SerializationPtr & serialization) { bool has_any_stream = false; bool has_all_streams = true; @@ -228,43 +230,8 @@ void MergeTreeReaderWide::addStreams( return; } - auto context = data_part_info_for_read->getContext(); - auto * load_marks_threadpool = settings.read_settings.load_marks_asynchronously ? &context->getLoadMarksThreadpool() : nullptr; - size_t num_marks_in_part = data_part_info_for_read->getMarksCount(); - - auto marks_loader = std::make_shared( - data_part_info_for_read, - mark_cache, - data_part_info_for_read->getIndexGranularityInfo().getMarksFilePath(*stream_name), - num_marks_in_part, - data_part_info_for_read->getIndexGranularityInfo(), - settings.save_marks_in_cache, - settings.read_settings, - load_marks_threadpool, - /*num_columns_in_mark=*/ 1); - + addStream(substream_path, *stream_name); has_any_stream = true; - auto stream_settings = settings; - stream_settings.is_low_cardinality_dictionary = substream_path.size() > 1 && substream_path[substream_path.size() - 2].type == ISerialization::Substream::Type::DictionaryKeys; - - auto create_stream = [&]() - { - return std::make_unique( - data_part_info_for_read->getDataPartStorage(), *stream_name, DATA_FILE_EXTENSION, - num_marks_in_part, all_mark_ranges, stream_settings, - uncompressed_cache, data_part_info_for_read->getFileSizeOrZero(*stream_name + DATA_FILE_EXTENSION), - std::move(marks_loader), profile_callback, clock_type); - }; - - if (read_without_marks) - { - streams.emplace(*stream_name, create_stream.operator()()); - } - else - { - marks_loader->startAsyncLoad(); - streams.emplace(*stream_name, create_stream.operator()()); - } }; serialization->enumerateStreams(callback); @@ -273,11 +240,46 @@ void MergeTreeReaderWide::addStreams( partially_read_columns.insert(name_and_type.name); } -static ReadBuffer * getStream( +MergeTreeReaderWide::FileStreams::iterator MergeTreeReaderWide::addStream(const ISerialization::SubstreamPath & substream_path, const String & stream_name) +{ + auto context = data_part_info_for_read->getContext(); + auto * load_marks_threadpool = settings.read_settings.load_marks_asynchronously ? &context->getLoadMarksThreadpool() : nullptr; + size_t num_marks_in_part = data_part_info_for_read->getMarksCount(); + + auto marks_loader = std::make_shared( + data_part_info_for_read, + mark_cache, + data_part_info_for_read->getIndexGranularityInfo().getMarksFilePath(stream_name), + num_marks_in_part, + data_part_info_for_read->getIndexGranularityInfo(), + settings.save_marks_in_cache, + settings.read_settings, + load_marks_threadpool, + /*num_columns_in_mark=*/ 1); + + auto stream_settings = settings; + stream_settings.is_low_cardinality_dictionary = substream_path.size() > 1 && substream_path[substream_path.size() - 2].type == ISerialization::Substream::Type::DictionaryKeys; + + auto create_stream = [&]() + { + return std::make_unique( + data_part_info_for_read->getDataPartStorage(), stream_name, DATA_FILE_EXTENSION, + num_marks_in_part, all_mark_ranges, stream_settings, + uncompressed_cache, data_part_info_for_read->getFileSizeOrZero(stream_name + DATA_FILE_EXTENSION), + std::move(marks_loader), profile_callback, clock_type); + }; + + if (read_without_marks) + return streams.emplace(stream_name, create_stream.operator()()).first; + + marks_loader->startAsyncLoad(); + return streams.emplace(stream_name, create_stream.operator()()).first; +} + +ReadBuffer * MergeTreeReaderWide::getStream( bool seek_to_start, const ISerialization::SubstreamPath & substream_path, const MergeTreeDataPartChecksums & checksums, - MergeTreeReaderWide::FileStreams & streams, const NameAndTypePair & name_and_type, size_t from_mark, bool seek_to_mark, @@ -294,7 +296,13 @@ static ReadBuffer * getStream( auto it = streams.find(*stream_name); if (it == streams.end()) - return nullptr; + { + /// If we didn't create requested stream, but file with this path exists, create a stream for it. + /// It may happen during reading of columns with dynamic subcolumns, because all streams are known + /// only after deserializing of binary bulk prefix. + + it = addStream(substream_path, *stream_name); + } MergeTreeReaderStream & stream = *it->second; stream.adjustRightMark(current_task_last_mark); @@ -311,17 +319,19 @@ void MergeTreeReaderWide::deserializePrefix( const SerializationPtr & serialization, const NameAndTypePair & name_and_type, size_t current_task_last_mark, - ISerialization::SubstreamsCache & cache) + ISerialization::SubstreamsCache & cache, + ISerialization::SubstreamsDeserializeStatesCache & deserialize_states_cache) { const auto & name = name_and_type.name; if (!deserialize_binary_bulk_state_map.contains(name)) { ISerialization::DeserializeBinaryBulkSettings deserialize_settings; + deserialize_settings.dynamic_read_statistics = true; deserialize_settings.getter = [&](const ISerialization::SubstreamPath & substream_path) { - return getStream(/* seek_to_start = */true, substream_path, data_part_info_for_read->getChecksums(), streams, name_and_type, 0, /* seek_to_mark = */false, current_task_last_mark, cache); + return getStream(/* seek_to_start = */true, substream_path, data_part_info_for_read->getChecksums(), name_and_type, 0, /* seek_to_mark = */false, current_task_last_mark, cache); }; - serialization->deserializeBinaryBulkStatePrefix(deserialize_settings, deserialize_binary_bulk_state_map[name]); + serialization->deserializeBinaryBulkStatePrefix(deserialize_settings, deserialize_binary_bulk_state_map[name], &deserialize_states_cache); } } @@ -332,38 +342,48 @@ void MergeTreeReaderWide::prefetchForColumn( size_t from_mark, bool continue_reading, size_t current_task_last_mark, - ISerialization::SubstreamsCache & cache) + ISerialization::SubstreamsCache & cache, + ISerialization::SubstreamsDeserializeStatesCache & deserialize_states_cache) { - deserializePrefix(serialization, name_and_type, current_task_last_mark, cache); - - serialization->enumerateStreams([&](const ISerialization::SubstreamPath & substream_path) + deserializePrefix(serialization, name_and_type, current_task_last_mark, cache, deserialize_states_cache); + auto callback = [&](const ISerialization::SubstreamPath & substream_path) { auto stream_name = IMergeTreeDataPart::getStreamNameForColumn(name_and_type, substream_path, data_part_info_for_read->getChecksums()); if (stream_name && !prefetched_streams.contains(*stream_name)) { bool seek_to_mark = !continue_reading && !read_without_marks; - - if (ReadBuffer * buf = getStream(false, substream_path, data_part_info_for_read->getChecksums(), streams, name_and_type, from_mark, seek_to_mark, current_task_last_mark, cache)) + if (ReadBuffer * buf = getStream(false, substream_path, data_part_info_for_read->getChecksums(), name_and_type, from_mark, seek_to_mark, current_task_last_mark, cache)) { buf->prefetch(priority); prefetched_streams.insert(*stream_name); } } - }); + }; + + auto data = ISerialization::SubstreamData(serialization).withType(name_and_type.type).withDeserializeState(deserialize_binary_bulk_state_map[name_and_type.name]); + ISerialization::EnumerateStreamsSettings settings; + serialization->enumerateStreams(settings, callback, data); } void MergeTreeReaderWide::readData( - const NameAndTypePair & name_and_type, const SerializationPtr & serialization, ColumnPtr & column, - size_t from_mark, bool continue_reading, size_t current_task_last_mark, - size_t max_rows_to_read, ISerialization::SubstreamsCache & cache, bool was_prefetched) + const NameAndTypePair & name_and_type, + const SerializationPtr & serialization, + ColumnPtr & column, + size_t from_mark, + bool continue_reading, + size_t current_task_last_mark, + size_t max_rows_to_read, + ISerialization::SubstreamsCache & cache, + ISerialization::SubstreamsDeserializeStatesCache & deserialize_states_cache, + bool was_prefetched) { double & avg_value_size_hint = avg_value_size_hints[name_and_type.name]; ISerialization::DeserializeBinaryBulkSettings deserialize_settings; deserialize_settings.avg_value_size_hint = avg_value_size_hint; - deserializePrefix(serialization, name_and_type, current_task_last_mark, cache); + deserializePrefix(serialization, name_and_type, current_task_last_mark, cache, deserialize_states_cache); deserialize_settings.getter = [&](const ISerialization::SubstreamPath & substream_path) { @@ -371,7 +391,7 @@ void MergeTreeReaderWide::readData( return getStream( /* seek_to_start = */false, substream_path, - data_part_info_for_read->getChecksums(), streams, + data_part_info_for_read->getChecksums(), name_and_type, from_mark, seek_to_mark, current_task_last_mark, cache); }; diff --git a/src/Storages/MergeTree/MergeTreeReaderWide.h b/src/Storages/MergeTree/MergeTreeReaderWide.h index 9f6bdd79b00..841c2dc567d 100644 --- a/src/Storages/MergeTree/MergeTreeReaderWide.h +++ b/src/Storages/MergeTree/MergeTreeReaderWide.h @@ -45,14 +45,31 @@ private: void addStreams( const NameAndTypePair & name_and_type, - const SerializationPtr & serialization, - const ReadBufferFromFileBase::ProfileCallback & profile_callback, - clockid_t clock_type); + const SerializationPtr & serialization); + + ReadBuffer * getStream( + bool seek_to_start, + const ISerialization::SubstreamPath & substream_path, + const MergeTreeDataPartChecksums & checksums, + const NameAndTypePair & name_and_type, + size_t from_mark, + bool seek_to_mark, + size_t current_task_last_mark, + ISerialization::SubstreamsCache & cache); + + FileStreams::iterator addStream(const ISerialization::SubstreamPath & substream_path, const String & stream_name); void readData( - const NameAndTypePair & name_and_type, const SerializationPtr & serialization, ColumnPtr & column, - size_t from_mark, bool continue_reading, size_t current_task_last_mark, size_t max_rows_to_read, - ISerialization::SubstreamsCache & cache, bool was_prefetched); + const NameAndTypePair & name_and_type, + const SerializationPtr & serialization, + ColumnPtr & column, + size_t from_mark, + bool continue_reading, + size_t current_task_last_mark, + size_t max_rows_to_read, + ISerialization::SubstreamsCache & cache, + ISerialization::SubstreamsDeserializeStatesCache & deserialize_states_cache, + bool was_prefetched); /// Make next readData more simple by calling 'prefetch' of all related ReadBuffers (column streams). void prefetchForColumn( @@ -62,17 +79,22 @@ private: size_t from_mark, bool continue_reading, size_t current_task_last_mark, - ISerialization::SubstreamsCache & cache); + ISerialization::SubstreamsCache & cache, + ISerialization::SubstreamsDeserializeStatesCache & deserialize_states_cache); void deserializePrefix( const SerializationPtr & serialization, const NameAndTypePair & name_and_type, size_t current_task_last_mark, - ISerialization::SubstreamsCache & cache); + ISerialization::SubstreamsCache & cache, + ISerialization::SubstreamsDeserializeStatesCache & deserialize_states_cache); std::unordered_map caches; + std::unordered_map deserialize_states_caches; std::unordered_set prefetched_streams; ssize_t prefetched_from_mark = -1; + ReadBufferFromFileBase::ProfileCallback profile_callback; + clockid_t clock_type; bool read_without_marks = false; }; diff --git a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp index fce733d47b7..78b67de1a7e 100644 --- a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp @@ -26,14 +26,12 @@ namespace ErrorCodes MergeTreeSelectProcessor::MergeTreeSelectProcessor( MergeTreeReadPoolPtr pool_, MergeTreeSelectAlgorithmPtr algorithm_, - const StorageSnapshotPtr & storage_snapshot_, const PrewhereInfoPtr & prewhere_info_, const ExpressionActionsSettings & actions_settings_, const MergeTreeReadTask::BlockSizeParams & block_size_params_, const MergeTreeReaderSettings & reader_settings_) : pool(std::move(pool_)) , algorithm(std::move(algorithm_)) - , storage_snapshot(storage_snapshot_) , prewhere_info(prewhere_info_) , actions_settings(actions_settings_) , prewhere_actions(getPrewhereActions(prewhere_info, actions_settings, reader_settings_.enable_multiple_prewhere_read_steps)) diff --git a/src/Storages/MergeTree/MergeTreeSelectProcessor.h b/src/Storages/MergeTree/MergeTreeSelectProcessor.h index 6b663e0fd36..8f41f5deacb 100644 --- a/src/Storages/MergeTree/MergeTreeSelectProcessor.h +++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.h @@ -41,7 +41,6 @@ public: MergeTreeSelectProcessor( MergeTreeReadPoolPtr pool_, MergeTreeSelectAlgorithmPtr algorithm_, - const StorageSnapshotPtr & storage_snapshot_, const PrewhereInfoPtr & prewhere_info_, const ExpressionActionsSettings & actions_settings_, const MergeTreeReadTask::BlockSizeParams & block_size_params_, @@ -71,7 +70,6 @@ private: const MergeTreeReadPoolPtr pool; const MergeTreeSelectAlgorithmPtr algorithm; - const StorageSnapshotPtr storage_snapshot; const PrewhereInfoPtr prewhere_info; const ExpressionActionsSettings actions_settings; diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/src/Storages/MergeTree/MergedBlockOutputStream.cpp index d8555d69788..c5799fab09f 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.cpp +++ b/src/Storages/MergeTree/MergedBlockOutputStream.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -21,35 +22,39 @@ MergedBlockOutputStream::MergedBlockOutputStream( const MergeTreeIndices & skip_indices, const Statistics & statistics, CompressionCodecPtr default_codec_, - const MergeTreeTransactionPtr & txn, + TransactionID tid, bool reset_columns_, bool blocks_are_granules_size, const WriteSettings & write_settings_, const MergeTreeIndexGranularity & computed_index_granularity) - : IMergedBlockOutputStream(data_part, metadata_snapshot_, columns_list_, reset_columns_) + : IMergedBlockOutputStream(data_part->storage.getSettings(), data_part->getDataPartStoragePtr(), metadata_snapshot_, columns_list_, reset_columns_) , columns_list(columns_list_) , default_codec(default_codec_) , write_settings(write_settings_) { MergeTreeWriterSettings writer_settings( - storage.getContext()->getSettings(), + data_part->storage.getContext()->getSettings(), write_settings, - storage.getSettings(), + storage_settings, data_part->index_granularity_info.mark_type.adaptive, /* rewrite_primary_key = */ true, blocks_are_granules_size); + /// TODO: looks like isStoredOnDisk() is always true for MergeTreeDataPart if (data_part->isStoredOnDisk()) data_part_storage->createDirectories(); - /// We should write version metadata on part creation to distinguish it from parts that were created without transaction. - TransactionID tid = txn ? txn->tid : Tx::PrehistoricTID; /// NOTE do not pass context for writing to system.transactions_info_log, /// because part may have temporary name (with temporary block numbers). Will write it later. data_part->version.setCreationTID(tid, nullptr); data_part->storeVersionMetadata(); - writer = data_part->getWriter(columns_list, metadata_snapshot, skip_indices, statistics, default_codec, writer_settings, computed_index_granularity); + writer = createMergeTreeDataPartWriter(data_part->getType(), + data_part->name, data_part->storage.getLogName(), data_part->getSerializations(), + data_part_storage, data_part->index_granularity_info, + storage_settings, + columns_list, data_part->getColumnPositions(), metadata_snapshot, data_part->storage.getVirtualsPtr(), + skip_indices, statistics, data_part->getMarksFileExtension(), default_codec, writer_settings, computed_index_granularity); } /// If data is pre-sorted. @@ -208,7 +213,7 @@ MergedBlockOutputStream::WrittenFiles MergedBlockOutputStream::finalizePartOnDis if (new_part->isProjectionPart()) { - if (storage.format_version >= MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING || isCompactPart(new_part)) + if (new_part->storage.format_version >= MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING || isCompactPart(new_part)) { auto count_out = new_part->getDataPartStorage().writeFile("count.txt", 4096, write_settings); HashingWriteBuffer count_out_hashing(*count_out); @@ -234,14 +239,16 @@ MergedBlockOutputStream::WrittenFiles MergedBlockOutputStream::finalizePartOnDis written_files.emplace_back(std::move(out)); } - if (storage.format_version >= MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) + if (new_part->storage.format_version >= MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) { - if (auto file = new_part->partition.store(storage, new_part->getDataPartStorage(), checksums)) + if (auto file = new_part->partition.store( + new_part->storage.getInMemoryMetadataPtr(), new_part->storage.getContext(), + new_part->getDataPartStorage(), checksums)) written_files.emplace_back(std::move(file)); if (new_part->minmax_idx->initialized) { - auto files = new_part->minmax_idx->store(storage, new_part->getDataPartStorage(), checksums); + auto files = new_part->minmax_idx->store(new_part->storage, new_part->getDataPartStorage(), checksums); for (auto & file : files) written_files.emplace_back(std::move(file)); } diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.h b/src/Storages/MergeTree/MergedBlockOutputStream.h index 540b3b3bffa..c1e3d75fefc 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.h +++ b/src/Storages/MergeTree/MergedBlockOutputStream.h @@ -22,7 +22,7 @@ public: const MergeTreeIndices & skip_indices, const Statistics & statistics, CompressionCodecPtr default_codec_, - const MergeTreeTransactionPtr & txn, + TransactionID tid, bool reset_columns_ = false, bool blocks_are_granules_size = false, const WriteSettings & write_settings = {}, diff --git a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp index 728b2e38833..674a9bd498f 100644 --- a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp +++ b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp @@ -20,11 +20,10 @@ MergedColumnOnlyOutputStream::MergedColumnOnlyOutputStream( WrittenOffsetColumns * offset_columns_, const MergeTreeIndexGranularity & index_granularity, const MergeTreeIndexGranularityInfo * index_granularity_info) - : IMergedBlockOutputStream(data_part, metadata_snapshot_, header_.getNamesAndTypesList(), /*reset_columns=*/ true) + : IMergedBlockOutputStream(data_part->storage.getSettings(), data_part->getDataPartStoragePtr(), metadata_snapshot_, header_.getNamesAndTypesList(), /*reset_columns=*/ true) , header(header_) { const auto & global_settings = data_part->storage.getContext()->getSettings(); - const auto & storage_settings = data_part->storage.getSettings(); MergeTreeWriterSettings writer_settings( global_settings, @@ -33,11 +32,18 @@ MergedColumnOnlyOutputStream::MergedColumnOnlyOutputStream( index_granularity_info ? index_granularity_info->mark_type.adaptive : data_part->storage.canUseAdaptiveGranularity(), /* rewrite_primary_key = */ false); - writer = data_part->getWriter( + writer = createMergeTreeDataPartWriter( + data_part->getType(), + data_part->name, data_part->storage.getLogName(), data_part->getSerializations(), + data_part_storage, data_part->index_granularity_info, + storage_settings, header.getNamesAndTypesList(), + data_part->getColumnPositions(), metadata_snapshot_, + data_part->storage.getVirtualsPtr(), indices_to_recalc, stats_to_recalc_, + data_part->getMarksFileExtension(), default_codec, writer_settings, index_granularity); diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 5934756fb95..43238c5bcbc 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -1,5 +1,6 @@ #include +#include #include #include #include @@ -60,6 +61,21 @@ static bool checkOperationIsNotCanceled(ActionBlocker & merges_blocker, MergeLis return true; } +static bool haveMutationsOfDynamicColumns(const MergeTreeData::DataPartPtr & data_part, const MutationCommands & commands) +{ + for (const auto & command : commands) + { + if (!command.column_name.empty()) + { + auto column = data_part->tryGetColumn(command.column_name); + if (column && column->type->hasDynamicSubcolumns()) + return true; + } + } + + return false; +} + static UInt64 getExistingRowsCount(const Block & block) { auto column = block.getByName(RowExistsColumn::name).column; @@ -95,7 +111,7 @@ static void splitAndModifyMutationCommands( auto part_columns = part->getColumnsDescription(); const auto & table_columns = metadata_snapshot->getColumns(); - if (!isWidePart(part) || !isFullPartStorage(part->getDataPartStorage())) + if (haveMutationsOfDynamicColumns(part, commands) || !isWidePart(part) || !isFullPartStorage(part->getDataPartStorage())) { NameSet mutated_columns; NameSet dropped_columns; @@ -1660,7 +1676,7 @@ private: skip_indices, stats_to_rewrite, ctx->compression_codec, - ctx->txn, + ctx->txn ? ctx->txn->tid : Tx::PrehistoricTID, /*reset_columns=*/ true, /*blocks_are_granules_size=*/ false, ctx->context->getWriteSettings(), @@ -2249,7 +2265,9 @@ bool MutateTask::prepare() /// All columns from part are changed and may be some more that were missing before in part /// TODO We can materialize compact part without copying data - if (!isWidePart(ctx->source_part) || !isFullPartStorage(ctx->source_part->getDataPartStorage()) + /// Also currently mutations of types with dynamic subcolumns in Wide part are possible only by + /// rewriting the whole part. + if (MutationHelpers::haveMutationsOfDynamicColumns(ctx->source_part, ctx->commands_for_part) || !isWidePart(ctx->source_part) || !isFullPartStorage(ctx->source_part->getDataPartStorage()) || (ctx->interpreter && ctx->interpreter->isAffectingAllColumns())) { /// In case of replicated merge tree with zero copy replication diff --git a/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h b/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h index ca8ed9abdb5..a94508ad41f 100644 --- a/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h +++ b/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h @@ -87,6 +87,7 @@ public: bool supportsPrewhere() const override { return true; } + bool supportsDynamicSubcolumnsDeprecated() const override { return true; } bool supportsDynamicSubcolumns() const override { return true; } bool supportsSubcolumns() const override { return true; } diff --git a/src/Storages/MergeTree/checkDataPart.cpp b/src/Storages/MergeTree/checkDataPart.cpp index 5d05ef8ebf2..525960d5314 100644 --- a/src/Storages/MergeTree/checkDataPart.cpp +++ b/src/Storages/MergeTree/checkDataPart.cpp @@ -219,7 +219,7 @@ static IMergeTreeDataPart::Checksums checkDataPart( auto file_name = *stream_name + ".bin"; checksums_data.files[file_name] = checksum_compressed_file(data_part_storage, file_name); - }); + }, column.type, data_part->getColumnSample(column)); } } else diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp index 4244ccccfe0..d234103e52b 100644 --- a/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -14,7 +14,6 @@ #include #include -#include #include #include @@ -32,7 +31,6 @@ namespace ErrorCodes extern const int UNKNOWN_STORAGE; extern const int NO_REPLICA_NAME_GIVEN; extern const int CANNOT_EXTRACT_TABLE_STRUCTURE; - extern const int DATA_TYPE_CANNOT_BE_USED_IN_KEY; } @@ -113,16 +111,6 @@ static ColumnsDescription getColumnsDescriptionFromZookeeper(const String & raw_ return ColumnsDescription::parse(zookeeper->get(fs::path(zookeeper_path) / "columns", &columns_stat)); } -static void verifySortingKey(const KeyDescription & sorting_key) -{ - /// Aggregate functions already forbidden, but SimpleAggregateFunction are not - for (const auto & data_type : sorting_key.data_types) - { - if (dynamic_cast(data_type->getCustomName())) - throw Exception(ErrorCodes::DATA_TYPE_CANNOT_BE_USED_IN_KEY, "Column with type {} is not allowed in key expression", data_type->getCustomName()->getName()); - } -} - /// Returns whether a new syntax is used to define a table engine, i.e. MergeTree() PRIMARY KEY ... PARTITION BY ... SETTINGS ... /// instead of MergeTree(MergeTree(date, [sample_key], primary_key). static bool isExtendedStorageDef(const ASTCreateQuery & query) @@ -678,8 +666,8 @@ static StoragePtr create(const StorageFactory::Arguments & args) /// column if sorting key will be changed. metadata.sorting_key = KeyDescription::getSortingKeyFromAST( args.storage_def->order_by->ptr(), metadata.columns, context, merging_param_key_arg); - if (!local_settings.allow_suspicious_primary_key) - verifySortingKey(metadata.sorting_key); + if (!local_settings.allow_suspicious_primary_key && args.mode <= LoadingStrictnessLevel::CREATE) + MergeTreeData::verifySortingKey(metadata.sorting_key); /// If primary key explicitly defined, than get it from AST if (args.storage_def->primary_key) @@ -792,8 +780,8 @@ static StoragePtr create(const StorageFactory::Arguments & args) /// column if sorting key will be changed. metadata.sorting_key = KeyDescription::getSortingKeyFromAST(engine_args[arg_num], metadata.columns, context, merging_param_key_arg); - if (!local_settings.allow_suspicious_primary_key) - verifySortingKey(metadata.sorting_key); + if (!local_settings.allow_suspicious_primary_key && args.mode <= LoadingStrictnessLevel::CREATE) + MergeTreeData::verifySortingKey(metadata.sorting_key); /// In old syntax primary_key always equals to sorting key. metadata.primary_key = KeyDescription::getKeyFromAST(engine_args[arg_num], metadata.columns, context); diff --git a/src/Storages/ObjectStorage/Azure/Configuration.cpp b/src/Storages/ObjectStorage/Azure/Configuration.cpp new file mode 100644 index 00000000000..ada3e2e9323 --- /dev/null +++ b/src/Storages/ObjectStorage/Azure/Configuration.cpp @@ -0,0 +1,551 @@ +#include + +#if USE_AZURE_BLOB_STORAGE + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + +const std::unordered_set required_configuration_keys = { + "blob_path", + "container", +}; + +const std::unordered_set optional_configuration_keys = { + "format", + "compression", + "structure", + "compression_method", + "account_name", + "account_key", + "connection_string", + "storage_account_url", +}; + +using AzureClient = Azure::Storage::Blobs::BlobContainerClient; +using AzureClientPtr = std::unique_ptr; + +namespace +{ + bool isConnectionString(const std::string & candidate) + { + return !candidate.starts_with("http"); + } + + template + bool containerExists(T & blob_service_client, const std::string & container_name) + { + Azure::Storage::Blobs::ListBlobContainersOptions options; + options.Prefix = container_name; + options.PageSizeHint = 1; + + auto containers_list_response = blob_service_client.ListBlobContainers(options); + auto containers_list = containers_list_response.BlobContainers; + + auto it = std::find_if( + containers_list.begin(), containers_list.end(), + [&](const auto & c) { return c.Name == container_name; }); + return it != containers_list.end(); + } +} + +Poco::URI StorageAzureConfiguration::getConnectionURL() const +{ + if (!is_connection_string) + return Poco::URI(connection_url); + + auto parsed_connection_string = Azure::Storage::_internal::ParseConnectionString(connection_url); + return Poco::URI(parsed_connection_string.BlobServiceUrl.GetAbsoluteUrl()); +} + +void StorageAzureConfiguration::check(ContextPtr context) const +{ + context->getGlobalContext()->getRemoteHostFilter().checkURL(getConnectionURL()); + Configuration::check(context); +} + +StorageAzureConfiguration::StorageAzureConfiguration(const StorageAzureConfiguration & other) + : Configuration(other) +{ + connection_url = other.connection_url; + is_connection_string = other.is_connection_string; + account_name = other.account_name; + account_key = other.account_key; + container = other.container; + blob_path = other.blob_path; + blobs_paths = other.blobs_paths; +} + +AzureObjectStorage::SettingsPtr StorageAzureConfiguration::createSettings(ContextPtr context) +{ + const auto & context_settings = context->getSettingsRef(); + auto settings_ptr = std::make_unique(); + settings_ptr->max_single_part_upload_size = context_settings.azure_max_single_part_upload_size; + settings_ptr->max_single_read_retries = context_settings.azure_max_single_read_retries; + settings_ptr->list_object_keys_size = static_cast(context_settings.azure_list_object_keys_size); + settings_ptr->strict_upload_part_size = context_settings.azure_strict_upload_part_size; + settings_ptr->max_upload_part_size = context_settings.azure_max_upload_part_size; + settings_ptr->max_blocks_in_multipart_upload = context_settings.azure_max_blocks_in_multipart_upload; + settings_ptr->min_upload_part_size = context_settings.azure_min_upload_part_size; + return settings_ptr; +} + +StorageObjectStorage::QuerySettings StorageAzureConfiguration::getQuerySettings(const ContextPtr & context) const +{ + const auto & settings = context->getSettingsRef(); + return StorageObjectStorage::QuerySettings{ + .truncate_on_insert = settings.azure_truncate_on_insert, + .create_new_file_on_insert = settings.azure_create_new_file_on_insert, + .schema_inference_use_cache = settings.schema_inference_use_cache_for_azure, + .schema_inference_mode = settings.schema_inference_mode, + .skip_empty_files = settings.azure_skip_empty_files, + .list_object_keys_size = settings.azure_list_object_keys_size, + .throw_on_zero_files_match = settings.azure_throw_on_zero_files_match, + .ignore_non_existent_file = settings.azure_ignore_file_doesnt_exist, + }; +} + +ObjectStoragePtr StorageAzureConfiguration::createObjectStorage(ContextPtr context, bool is_readonly) /// NOLINT +{ + assertInitialized(); + auto client = createClient(is_readonly, /* attempt_to_create_container */true); + auto settings = createSettings(context); + return std::make_unique( + "AzureBlobStorage", std::move(client), std::move(settings), container, getConnectionURL().toString()); +} + +AzureClientPtr StorageAzureConfiguration::createClient(bool is_read_only, bool attempt_to_create_container) +{ + using namespace Azure::Storage::Blobs; + + AzureClientPtr result; + + if (is_connection_string) + { + auto managed_identity_credential = std::make_shared(); + auto blob_service_client = std::make_unique(BlobServiceClient::CreateFromConnectionString(connection_url)); + result = std::make_unique(BlobContainerClient::CreateFromConnectionString(connection_url, container)); + + if (attempt_to_create_container) + { + bool container_exists = containerExists(*blob_service_client, container); + if (!container_exists) + { + if (is_read_only) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "AzureBlobStorage container does not exist '{}'", + container); + + try + { + result->CreateIfNotExists(); + } + catch (const Azure::Storage::StorageException & e) + { + if (!(e.StatusCode == Azure::Core::Http::HttpStatusCode::Conflict + && e.ReasonPhrase == "The specified container already exists.")) + { + throw; + } + } + } + } + } + else + { + std::shared_ptr storage_shared_key_credential; + if (account_name.has_value() && account_key.has_value()) + { + storage_shared_key_credential + = std::make_shared(*account_name, *account_key); + } + + std::unique_ptr blob_service_client; + std::shared_ptr managed_identity_credential; + if (storage_shared_key_credential) + { + blob_service_client = std::make_unique(connection_url, storage_shared_key_credential); + } + else + { + managed_identity_credential = std::make_shared(); + blob_service_client = std::make_unique(connection_url, managed_identity_credential); + } + + std::string final_url; + size_t pos = connection_url.find('?'); + if (pos != std::string::npos) + { + auto url_without_sas = connection_url.substr(0, pos); + final_url = url_without_sas + (url_without_sas.back() == '/' ? "" : "/") + container + + connection_url.substr(pos); + } + else + final_url + = connection_url + (connection_url.back() == '/' ? "" : "/") + container; + + if (!attempt_to_create_container) + { + if (storage_shared_key_credential) + return std::make_unique(final_url, storage_shared_key_credential); + else + return std::make_unique(final_url, managed_identity_credential); + } + + bool container_exists = containerExists(*blob_service_client, container); + if (container_exists) + { + if (storage_shared_key_credential) + result = std::make_unique(final_url, storage_shared_key_credential); + else + result = std::make_unique(final_url, managed_identity_credential); + } + else + { + if (is_read_only) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "AzureBlobStorage container does not exist '{}'", + container); + try + { + result = std::make_unique(blob_service_client->CreateBlobContainer(container).Value); + } catch (const Azure::Storage::StorageException & e) + { + if (e.StatusCode == Azure::Core::Http::HttpStatusCode::Conflict + && e.ReasonPhrase == "The specified container already exists.") + { + if (storage_shared_key_credential) + result = std::make_unique(final_url, storage_shared_key_credential); + else + result = std::make_unique(final_url, managed_identity_credential); + } + else + { + throw; + } + } + } + } + + return result; +} + +void StorageAzureConfiguration::fromNamedCollection(const NamedCollection & collection) +{ + validateNamedCollection(collection, required_configuration_keys, optional_configuration_keys); + + if (collection.has("connection_string")) + { + connection_url = collection.get("connection_string"); + is_connection_string = true; + } + + if (collection.has("storage_account_url")) + { + connection_url = collection.get("storage_account_url"); + is_connection_string = false; + } + + container = collection.get("container"); + blob_path = collection.get("blob_path"); + + if (collection.has("account_name")) + account_name = collection.get("account_name"); + + if (collection.has("account_key")) + account_key = collection.get("account_key"); + + structure = collection.getOrDefault("structure", "auto"); + format = collection.getOrDefault("format", format); + compression_method = collection.getOrDefault("compression_method", collection.getOrDefault("compression", "auto")); + + blobs_paths = {blob_path}; +} + +void StorageAzureConfiguration::fromAST(ASTs & engine_args, ContextPtr context, bool with_structure) +{ + if (engine_args.size() < 3 || engine_args.size() > (with_structure ? 8 : 7)) + { + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Storage AzureBlobStorage requires 3 to 7 arguments: " + "AzureBlobStorage(connection_string|storage_account_url, container_name, blobpath, " + "[account_name, account_key, format, compression, structure)])"); + } + + for (auto & engine_arg : engine_args) + engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, context); + + std::unordered_map engine_args_to_idx; + + connection_url = checkAndGetLiteralArgument(engine_args[0], "connection_string/storage_account_url"); + is_connection_string = isConnectionString(connection_url); + + container = checkAndGetLiteralArgument(engine_args[1], "container"); + blob_path = checkAndGetLiteralArgument(engine_args[2], "blobpath"); + + auto is_format_arg = [] (const std::string & s) -> bool + { + return s == "auto" || FormatFactory::instance().getAllFormats().contains(Poco::toLower(s)); + }; + + if (engine_args.size() == 4) + { + auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); + if (is_format_arg(fourth_arg)) + { + format = fourth_arg; + } + else + { + if (with_structure) + structure = fourth_arg; + else + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Unknown format or account name specified without account key: {}", fourth_arg); + } + } + else if (engine_args.size() == 5) + { + auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); + if (is_format_arg(fourth_arg)) + { + format = fourth_arg; + compression_method = checkAndGetLiteralArgument(engine_args[4], "compression"); + } + else + { + account_name = fourth_arg; + account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); + } + } + else if (engine_args.size() == 6) + { + auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); + if (is_format_arg(fourth_arg)) + { + if (with_structure) + { + format = fourth_arg; + compression_method = checkAndGetLiteralArgument(engine_args[4], "compression"); + structure = checkAndGetLiteralArgument(engine_args[5], "structure"); + } + else + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Format and compression must be last arguments"); + } + else + { + account_name = fourth_arg; + account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); + auto sixth_arg = checkAndGetLiteralArgument(engine_args[5], "format/account_name"); + if (is_format_arg(sixth_arg)) + format = sixth_arg; + else + { + if (with_structure) + structure = sixth_arg; + else + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown format {}", sixth_arg); + } + } + } + else if (engine_args.size() == 7) + { + auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); + if (!with_structure && is_format_arg(fourth_arg)) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Format and compression must be last arguments"); + } + else + { + account_name = fourth_arg; + account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); + auto sixth_arg = checkAndGetLiteralArgument(engine_args[5], "format/account_name"); + if (!is_format_arg(sixth_arg)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown format {}", sixth_arg); + format = sixth_arg; + compression_method = checkAndGetLiteralArgument(engine_args[6], "compression"); + } + } + else if (with_structure && engine_args.size() == 8) + { + auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); + account_name = fourth_arg; + account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); + auto sixth_arg = checkAndGetLiteralArgument(engine_args[5], "format/account_name"); + if (!is_format_arg(sixth_arg)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown format {}", sixth_arg); + format = sixth_arg; + compression_method = checkAndGetLiteralArgument(engine_args[6], "compression"); + structure = checkAndGetLiteralArgument(engine_args[7], "structure"); + } + + blobs_paths = {blob_path}; +} + +void StorageAzureConfiguration::addStructureAndFormatToArgs( + ASTs & args, const String & structure_, const String & format_, ContextPtr context) +{ + if (tryGetNamedCollectionWithOverrides(args, context)) + { + /// In case of named collection, just add key-value pair "structure='...'" + /// at the end of arguments to override existed structure. + ASTs equal_func_args = {std::make_shared("structure"), std::make_shared(structure_)}; + auto equal_func = makeASTFunction("equals", std::move(equal_func_args)); + args.push_back(equal_func); + } + else + { + if (args.size() < 3 || args.size() > 8) + { + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Storage Azure requires 3 to 7 arguments: " + "StorageObjectStorage(connection_string|storage_account_url, container_name, " + "blobpath, [account_name, account_key, format, compression, structure])"); + } + + for (auto & arg : args) + arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context); + + auto structure_literal = std::make_shared(structure_); + auto format_literal = std::make_shared(format_); + auto is_format_arg + = [](const std::string & s) -> bool { return s == "auto" || FormatFactory::instance().getAllFormats().contains(s); }; + + /// (connection_string, container_name, blobpath) + if (args.size() == 3) + { + args.push_back(format_literal); + /// Add compression = "auto" before structure argument. + args.push_back(std::make_shared("auto")); + args.push_back(structure_literal); + } + /// (connection_string, container_name, blobpath, structure) or + /// (connection_string, container_name, blobpath, format) + /// We can distinguish them by looking at the 4-th argument: check if it's format name or not. + else if (args.size() == 4) + { + auto fourth_arg = checkAndGetLiteralArgument(args[3], "format/account_name/structure"); + /// (..., format) -> (..., format, compression, structure) + if (is_format_arg(fourth_arg)) + { + if (fourth_arg == "auto") + args[3] = format_literal; + /// Add compression=auto before structure argument. + args.push_back(std::make_shared("auto")); + args.push_back(structure_literal); + } + /// (..., structure) -> (..., format, compression, structure) + else + { + auto structure_arg = args.back(); + args[3] = format_literal; + /// Add compression=auto before structure argument. + args.push_back(std::make_shared("auto")); + if (fourth_arg == "auto") + args.push_back(structure_literal); + else + args.push_back(structure_arg); + } + } + /// (connection_string, container_name, blobpath, format, compression) or + /// (storage_account_url, container_name, blobpath, account_name, account_key) + /// We can distinguish them by looking at the 4-th argument: check if it's format name or not. + else if (args.size() == 5) + { + auto fourth_arg = checkAndGetLiteralArgument(args[3], "format/account_name"); + /// (..., format, compression) -> (..., format, compression, structure) + if (is_format_arg(fourth_arg)) + { + if (fourth_arg == "auto") + args[3] = format_literal; + args.push_back(structure_literal); + } + /// (..., account_name, account_key) -> (..., account_name, account_key, format, compression, structure) + else + { + args.push_back(format_literal); + /// Add compression=auto before structure argument. + args.push_back(std::make_shared("auto")); + args.push_back(structure_literal); + } + } + /// (connection_string, container_name, blobpath, format, compression, structure) or + /// (storage_account_url, container_name, blobpath, account_name, account_key, structure) or + /// (storage_account_url, container_name, blobpath, account_name, account_key, format) + else if (args.size() == 6) + { + auto fourth_arg = checkAndGetLiteralArgument(args[3], "format/account_name"); + auto sixth_arg = checkAndGetLiteralArgument(args[5], "format/structure"); + + /// (..., format, compression, structure) + if (is_format_arg(fourth_arg)) + { + if (fourth_arg == "auto") + args[3] = format_literal; + if (checkAndGetLiteralArgument(args[5], "structure") == "auto") + args[5] = structure_literal; + } + /// (..., account_name, account_key, format) -> (..., account_name, account_key, format, compression, structure) + else if (is_format_arg(sixth_arg)) + { + if (sixth_arg == "auto") + args[5] = format_literal; + /// Add compression=auto before structure argument. + args.push_back(std::make_shared("auto")); + args.push_back(structure_literal); + } + /// (..., account_name, account_key, structure) -> (..., account_name, account_key, format, compression, structure) + else + { + auto structure_arg = args.back(); + args[5] = format_literal; + /// Add compression=auto before structure argument. + args.push_back(std::make_shared("auto")); + if (sixth_arg == "auto") + args.push_back(structure_literal); + else + args.push_back(structure_arg); + } + } + /// (storage_account_url, container_name, blobpath, account_name, account_key, format, compression) + else if (args.size() == 7) + { + /// (..., format, compression) -> (..., format, compression, structure) + if (checkAndGetLiteralArgument(args[5], "format") == "auto") + args[5] = format_literal; + args.push_back(structure_literal); + } + /// (storage_account_url, container_name, blobpath, account_name, account_key, format, compression, structure) + else if (args.size() == 8) + { + if (checkAndGetLiteralArgument(args[5], "format") == "auto") + args[5] = format_literal; + if (checkAndGetLiteralArgument(args[7], "structure") == "auto") + args[7] = structure_literal; + } + } +} + +} + +#endif diff --git a/src/Storages/ObjectStorage/Azure/Configuration.h b/src/Storages/ObjectStorage/Azure/Configuration.h new file mode 100644 index 00000000000..35b19079ca9 --- /dev/null +++ b/src/Storages/ObjectStorage/Azure/Configuration.h @@ -0,0 +1,77 @@ +#pragma once + +#include "config.h" + +#if USE_AZURE_BLOB_STORAGE +#include +#include +#include + +namespace DB +{ +class BackupFactory; + +class StorageAzureConfiguration : public StorageObjectStorage::Configuration +{ + friend class BackupReaderAzureBlobStorage; + friend class BackupWriterAzureBlobStorage; + friend void registerBackupEngineAzureBlobStorage(BackupFactory & factory); + +public: + using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr; + + static constexpr auto type_name = "azure"; + static constexpr auto engine_name = "Azure"; + + StorageAzureConfiguration() = default; + StorageAzureConfiguration(const StorageAzureConfiguration & other); + + std::string getTypeName() const override { return type_name; } + std::string getEngineName() const override { return engine_name; } + + Path getPath() const override { return blob_path; } + void setPath(const Path & path) override { blob_path = path; } + + const Paths & getPaths() const override { return blobs_paths; } + void setPaths(const Paths & paths) override { blobs_paths = paths; } + + String getNamespace() const override { return container; } + String getDataSourceDescription() const override { return std::filesystem::path(connection_url) / container; } + StorageObjectStorage::QuerySettings getQuerySettings(const ContextPtr &) const override; + + void check(ContextPtr context) const override; + ConfigurationPtr clone() override { return std::make_shared(*this); } + + ObjectStoragePtr createObjectStorage(ContextPtr context, bool is_readonly) override; + + void addStructureAndFormatToArgs( + ASTs & args, + const String & structure_, + const String & format_, + ContextPtr context) override; + +protected: + void fromNamedCollection(const NamedCollection & collection) override; + void fromAST(ASTs & args, ContextPtr context, bool with_structure) override; + + using AzureClient = Azure::Storage::Blobs::BlobContainerClient; + using AzureClientPtr = std::unique_ptr; + + std::string connection_url; + bool is_connection_string; + + std::optional account_name; + std::optional account_key; + + std::string container; + std::string blob_path; + std::vector blobs_paths; + + AzureClientPtr createClient(bool is_read_only, bool attempt_to_create_container); + AzureObjectStorage::SettingsPtr createSettings(ContextPtr local_context); + Poco::URI getConnectionURL() const; +}; + +} + +#endif diff --git a/src/Storages/ObjectStorage/DataLakes/Common.cpp b/src/Storages/ObjectStorage/DataLakes/Common.cpp new file mode 100644 index 00000000000..4830cc52a90 --- /dev/null +++ b/src/Storages/ObjectStorage/DataLakes/Common.cpp @@ -0,0 +1,28 @@ +#include "Common.h" +#include +#include +#include + +namespace DB +{ + +std::vector listFiles( + const IObjectStorage & object_storage, + const StorageObjectStorage::Configuration & configuration, + const String & prefix, const String & suffix) +{ + auto key = std::filesystem::path(configuration.getPath()) / prefix; + RelativePathsWithMetadata files_with_metadata; + object_storage.listObjects(key, files_with_metadata, 0); + Strings res; + for (const auto & file_with_metadata : files_with_metadata) + { + const auto & filename = file_with_metadata->relative_path; + if (filename.ends_with(suffix)) + res.push_back(filename); + } + LOG_TRACE(getLogger("DataLakeCommon"), "Listed {} files ({})", res.size(), fmt::join(res, ", ")); + return res; +} + +} diff --git a/src/Storages/ObjectStorage/DataLakes/Common.h b/src/Storages/ObjectStorage/DataLakes/Common.h new file mode 100644 index 00000000000..db3afa9e4a6 --- /dev/null +++ b/src/Storages/ObjectStorage/DataLakes/Common.h @@ -0,0 +1,15 @@ +#pragma once +#include +#include + +namespace DB +{ + +class IObjectStorage; + +std::vector listFiles( + const IObjectStorage & object_storage, + const StorageObjectStorage::Configuration & configuration, + const String & prefix, const String & suffix); + +} diff --git a/src/Storages/DataLakes/DeltaLakeMetadataParser.cpp b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp similarity index 78% rename from src/Storages/DataLakes/DeltaLakeMetadataParser.cpp rename to src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp index 1687a4754f5..277d07d88ef 100644 --- a/src/Storages/DataLakes/DeltaLakeMetadataParser.cpp +++ b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp @@ -1,11 +1,9 @@ -#include +#include #include #include "config.h" #include #if USE_AWS_S3 && USE_PARQUET -#include -#include #include #include #include @@ -13,13 +11,12 @@ #include #include #include +#include +#include #include #include #include -#include -#include - -namespace fs = std::filesystem; +#include namespace DB { @@ -30,13 +27,24 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } -template -struct DeltaLakeMetadataParser::Impl +struct DeltaLakeMetadata::Impl { + ObjectStoragePtr object_storage; + ConfigurationPtr configuration; + ContextPtr context; + /** * Useful links: * - https://github.com/delta-io/delta/blob/master/PROTOCOL.md#data-files */ + Impl(ObjectStoragePtr object_storage_, + ConfigurationPtr configuration_, + ContextPtr context_) + : object_storage(object_storage_) + , configuration(configuration_) + , context(context_) + { + } /** * DeltaLake tables store metadata files and data files. @@ -66,10 +74,10 @@ struct DeltaLakeMetadataParser::Impl * An action changes one aspect of the table's state, for example, adding or removing a file. * Note: it is not a valid json, but a list of json's, so we read it in a while cycle. */ - std::set processMetadataFiles(const Configuration & configuration, ContextPtr context) + std::set processMetadataFiles() { std::set result_files; - const auto checkpoint_version = getCheckpointIfExists(result_files, configuration, context); + const auto checkpoint_version = getCheckpointIfExists(result_files); if (checkpoint_version) { @@ -77,12 +85,12 @@ struct DeltaLakeMetadataParser::Impl while (true) { const auto filename = withPadding(++current_version) + metadata_file_suffix; - const auto file_path = fs::path(configuration.getPath()) / deltalake_metadata_directory / filename; + const auto file_path = std::filesystem::path(configuration->getPath()) / deltalake_metadata_directory / filename; - if (!MetadataReadHelper::exists(file_path, configuration)) + if (!object_storage->exists(StoredObject(file_path))) break; - processMetadataFile(file_path, result_files, configuration, context); + processMetadataFile(file_path, result_files); } LOG_TRACE( @@ -91,11 +99,9 @@ struct DeltaLakeMetadataParser::Impl } else { - const auto keys = MetadataReadHelper::listFiles( - configuration, deltalake_metadata_directory, metadata_file_suffix); - + const auto keys = listFiles(*object_storage, *configuration, deltalake_metadata_directory, metadata_file_suffix); for (const String & key : keys) - processMetadataFile(key, result_files, configuration, context); + processMetadataFile(key, result_files); } return result_files; @@ -130,13 +136,10 @@ struct DeltaLakeMetadataParser::Impl * \"nullCount\":{\"col-6c990940-59bb-4709-8f2e-17083a82c01a\":0,\"col-763cd7e2-7627-4d8e-9fb7-9e85d0c8845b\":0}}"}} * " */ - void processMetadataFile( - const String & key, - std::set & result, - const Configuration & configuration, - ContextPtr context) + void processMetadataFile(const String & key, std::set & result) const { - auto buf = MetadataReadHelper::createReadBuffer(key, context, configuration); + auto read_settings = context->getReadSettings(); + auto buf = object_storage->readObject(StoredObject(key), read_settings); char c; while (!buf->eof()) @@ -158,12 +161,12 @@ struct DeltaLakeMetadataParser::Impl if (json.has("add")) { const auto path = json["add"]["path"].getString(); - result.insert(fs::path(configuration.getPath()) / path); + result.insert(std::filesystem::path(configuration->getPath()) / path); } else if (json.has("remove")) { const auto path = json["remove"]["path"].getString(); - result.erase(fs::path(configuration.getPath()) / path); + result.erase(std::filesystem::path(configuration->getPath()) / path); } } } @@ -181,14 +184,15 @@ struct DeltaLakeMetadataParser::Impl * * We need to get "version", which is the version of the checkpoint we need to read. */ - size_t readLastCheckpointIfExists(const Configuration & configuration, ContextPtr context) + size_t readLastCheckpointIfExists() const { - const auto last_checkpoint_file = fs::path(configuration.getPath()) / deltalake_metadata_directory / "_last_checkpoint"; - if (!MetadataReadHelper::exists(last_checkpoint_file, configuration)) + const auto last_checkpoint_file = std::filesystem::path(configuration->getPath()) / deltalake_metadata_directory / "_last_checkpoint"; + if (!object_storage->exists(StoredObject(last_checkpoint_file))) return 0; String json_str; - auto buf = MetadataReadHelper::createReadBuffer(last_checkpoint_file, context, configuration); + auto read_settings = context->getReadSettings(); + auto buf = object_storage->readObject(StoredObject(last_checkpoint_file), read_settings); readJSONObjectPossiblyInvalid(json_str, *buf); const JSON json(json_str); @@ -238,18 +242,19 @@ struct DeltaLakeMetadataParser::Impl throw Exception(ErrorCodes::BAD_ARGUMENTS, "Arrow error: {}", _s.ToString()); \ } while (false) - size_t getCheckpointIfExists(std::set & result, const Configuration & configuration, ContextPtr context) + size_t getCheckpointIfExists(std::set & result) { - const auto version = readLastCheckpointIfExists(configuration, context); + const auto version = readLastCheckpointIfExists(); if (!version) return 0; const auto checkpoint_filename = withPadding(version) + ".checkpoint.parquet"; - const auto checkpoint_path = fs::path(configuration.getPath()) / deltalake_metadata_directory / checkpoint_filename; + const auto checkpoint_path = std::filesystem::path(configuration->getPath()) / deltalake_metadata_directory / checkpoint_filename; LOG_TRACE(log, "Using checkpoint file: {}", checkpoint_path.string()); - auto buf = MetadataReadHelper::createReadBuffer(checkpoint_path, context, configuration); + auto read_settings = context->getReadSettings(); + auto buf = object_storage->readObject(StoredObject(checkpoint_path), read_settings); auto format_settings = getFormatSettings(context); /// Force nullable, because this parquet file for some reason does not have nullable @@ -306,7 +311,7 @@ struct DeltaLakeMetadataParser::Impl if (filename.empty()) continue; LOG_TEST(log, "Adding {}", filename); - const auto [_, inserted] = result.insert(fs::path(configuration.getPath()) / filename); + const auto [_, inserted] = result.insert(std::filesystem::path(configuration->getPath()) / filename); if (!inserted) throw Exception(ErrorCodes::INCORRECT_DATA, "File already exists {}", filename); } @@ -317,22 +322,24 @@ struct DeltaLakeMetadataParser::Impl LoggerPtr log = getLogger("DeltaLakeMetadataParser"); }; - -template -DeltaLakeMetadataParser::DeltaLakeMetadataParser() : impl(std::make_unique()) +DeltaLakeMetadata::DeltaLakeMetadata( + ObjectStoragePtr object_storage_, + ConfigurationPtr configuration_, + ContextPtr context_) + : impl(std::make_unique(object_storage_, configuration_, context_)) { } -template -Strings DeltaLakeMetadataParser::getFiles(const Configuration & configuration, ContextPtr context) +Strings DeltaLakeMetadata::getDataFiles() const { - auto result = impl->processMetadataFiles(configuration, context); - return Strings(result.begin(), result.end()); + if (!data_files.empty()) + return data_files; + + auto result = impl->processMetadataFiles(); + data_files = Strings(result.begin(), result.end()); + return data_files; } -template DeltaLakeMetadataParser::DeltaLakeMetadataParser(); -template Strings DeltaLakeMetadataParser::getFiles( - const StorageS3::Configuration & configuration, ContextPtr); } #endif diff --git a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.h b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.h new file mode 100644 index 00000000000..e527721b29e --- /dev/null +++ b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.h @@ -0,0 +1,49 @@ +#pragma once + +#include +#include +#include +#include +#include + +namespace DB +{ + +class DeltaLakeMetadata final : public IDataLakeMetadata +{ +public: + using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr; + static constexpr auto name = "DeltaLake"; + + DeltaLakeMetadata( + ObjectStoragePtr object_storage_, + ConfigurationPtr configuration_, + ContextPtr context_); + + Strings getDataFiles() const override; + + NamesAndTypesList getTableSchema() const override { return {}; } + + bool operator ==(const IDataLakeMetadata & other) const override + { + const auto * deltalake_metadata = dynamic_cast(&other); + return deltalake_metadata + && !data_files.empty() && !deltalake_metadata->data_files.empty() + && data_files == deltalake_metadata->data_files; + } + + static DataLakeMetadataPtr create( + ObjectStoragePtr object_storage, + ConfigurationPtr configuration, + ContextPtr local_context) + { + return std::make_unique(object_storage, configuration, local_context); + } + +private: + struct Impl; + const std::shared_ptr impl; + mutable Strings data_files; +}; + +} diff --git a/src/Storages/ObjectStorage/DataLakes/HudiMetadata.cpp b/src/Storages/ObjectStorage/DataLakes/HudiMetadata.cpp new file mode 100644 index 00000000000..91a586ccbf9 --- /dev/null +++ b/src/Storages/ObjectStorage/DataLakes/HudiMetadata.cpp @@ -0,0 +1,106 @@ +#include +#include +#include +#include +#include +#include +#include "config.h" +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +/** + * Useful links: + * - https://hudi.apache.org/tech-specs/ + * - https://hudi.apache.org/docs/file_layouts/ + */ + +/** + * Hudi tables store metadata files and data files. + * Metadata files are stored in .hoodie/metadata directory. Though unlike DeltaLake and Iceberg, + * metadata is not required in order to understand which files we need to read, moreover, + * for Hudi metadata does not always exist. + * + * There can be two types of data files + * 1. base files (columnar file formats like Apache Parquet/Orc) + * 2. log files + * Currently we support reading only `base files`. + * Data file name format: + * [File Id]_[File Write Token]_[Transaction timestamp].[File Extension] + * + * To find needed parts we need to find out latest part file for every file group for every partition. + * Explanation why: + * Hudi reads in and overwrites the entire table/partition with each update. + * Hudi controls the number of file groups under a single partition according to the + * hoodie.parquet.max.file.size option. Once a single Parquet file is too large, Hudi creates a second file group. + * Each file group is identified by File Id. + */ +Strings HudiMetadata::getDataFilesImpl() const +{ + auto log = getLogger("HudiMetadata"); + const auto keys = listFiles(*object_storage, *configuration, "", Poco::toLower(configuration->format)); + + using Partition = std::string; + using FileID = std::string; + struct FileInfo + { + String key; + UInt64 timestamp = 0; + }; + std::unordered_map> files; + + for (const auto & key : keys) + { + auto key_file = std::filesystem::path(key); + Strings file_parts; + const String stem = key_file.stem(); + splitInto<'_'>(file_parts, stem); + if (file_parts.size() != 3) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected format for file: {}", key); + + const auto partition = key_file.parent_path().stem(); + const auto & file_id = file_parts[0]; + const auto timestamp = parse(file_parts[2]); + + auto & file_info = files[partition][file_id]; + if (file_info.timestamp == 0 || file_info.timestamp < timestamp) + { + file_info.key = key; + file_info.timestamp = timestamp; + } + } + + Strings result; + for (auto & [partition, partition_data] : files) + { + LOG_TRACE(log, "Adding {} data files from partition {}", partition, partition_data.size()); + for (auto & [file_id, file_data] : partition_data) + result.push_back(std::move(file_data.key)); + } + return result; +} + +HudiMetadata::HudiMetadata( + ObjectStoragePtr object_storage_, + ConfigurationPtr configuration_, + ContextPtr context_) + : WithContext(context_) + , object_storage(object_storage_) + , configuration(configuration_) +{ +} + +Strings HudiMetadata::getDataFiles() const +{ + if (data_files.empty()) + data_files = getDataFilesImpl(); + return data_files; +} + +} diff --git a/src/Storages/ObjectStorage/DataLakes/HudiMetadata.h b/src/Storages/ObjectStorage/DataLakes/HudiMetadata.h new file mode 100644 index 00000000000..3ab274b1fbf --- /dev/null +++ b/src/Storages/ObjectStorage/DataLakes/HudiMetadata.h @@ -0,0 +1,53 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +class HudiMetadata final : public IDataLakeMetadata, private WithContext +{ +public: + using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr; + + static constexpr auto name = "Hudi"; + + HudiMetadata( + ObjectStoragePtr object_storage_, + ConfigurationPtr configuration_, + ContextPtr context_); + + Strings getDataFiles() const override; + + NamesAndTypesList getTableSchema() const override { return {}; } + + bool operator ==(const IDataLakeMetadata & other) const override + { + const auto * hudi_metadata = dynamic_cast(&other); + return hudi_metadata + && !data_files.empty() && !hudi_metadata->data_files.empty() + && data_files == hudi_metadata->data_files; + } + + static DataLakeMetadataPtr create( + ObjectStoragePtr object_storage, + ConfigurationPtr configuration, + ContextPtr local_context) + { + return std::make_unique(object_storage, configuration, local_context); + } + +private: + const ObjectStoragePtr object_storage; + const ConfigurationPtr configuration; + mutable Strings data_files; + + Strings getDataFilesImpl() const; +}; + +} diff --git a/src/Storages/ObjectStorage/DataLakes/IDataLakeMetadata.h b/src/Storages/ObjectStorage/DataLakes/IDataLakeMetadata.h new file mode 100644 index 00000000000..a2bd5adb947 --- /dev/null +++ b/src/Storages/ObjectStorage/DataLakes/IDataLakeMetadata.h @@ -0,0 +1,19 @@ +#pragma once +#include +#include +#include + +namespace DB +{ + +class IDataLakeMetadata : boost::noncopyable +{ +public: + virtual ~IDataLakeMetadata() = default; + virtual Strings getDataFiles() const = 0; + virtual NamesAndTypesList getTableSchema() const = 0; + virtual bool operator==(const IDataLakeMetadata & other) const = 0; +}; +using DataLakeMetadataPtr = std::unique_ptr; + +} diff --git a/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h b/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h new file mode 100644 index 00000000000..83865c47eb8 --- /dev/null +++ b/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h @@ -0,0 +1,139 @@ +#pragma once + +#include "config.h" + +#if USE_AWS_S3 && USE_AVRO + +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +/// Storage for read-only integration with Apache Iceberg tables in Amazon S3 (see https://iceberg.apache.org/) +/// Right now it's implemented on top of StorageS3 and right now it doesn't support +/// many Iceberg features like schema evolution, partitioning, positional and equality deletes. +template +class IStorageDataLake final : public StorageObjectStorage +{ +public: + using Storage = StorageObjectStorage; + using ConfigurationPtr = Storage::ConfigurationPtr; + + static StoragePtr create( + ConfigurationPtr base_configuration, + ContextPtr context, + const StorageID & table_id_, + const ColumnsDescription & columns_, + const ConstraintsDescription & constraints_, + const String & comment_, + std::optional format_settings_, + LoadingStrictnessLevel mode) + { + auto object_storage = base_configuration->createObjectStorage(context, /* is_readonly */true); + DataLakeMetadataPtr metadata; + NamesAndTypesList schema_from_metadata; + + if (base_configuration->format == "auto") + base_configuration->format = "Parquet"; + + ConfigurationPtr configuration = base_configuration->clone(); + + try + { + metadata = DataLakeMetadata::create(object_storage, base_configuration, context); + schema_from_metadata = metadata->getTableSchema(); + configuration->setPaths(metadata->getDataFiles()); + } + catch (...) + { + if (mode <= LoadingStrictnessLevel::CREATE) + throw; + + metadata.reset(); + configuration->setPaths({}); + tryLogCurrentException(__PRETTY_FUNCTION__); + } + + return std::make_shared>( + base_configuration, std::move(metadata), configuration, object_storage, + context, table_id_, + columns_.empty() ? ColumnsDescription(schema_from_metadata) : columns_, + constraints_, comment_, format_settings_); + } + + String getName() const override { return DataLakeMetadata::name; } + + static ColumnsDescription getTableStructureFromData( + ObjectStoragePtr object_storage_, + ConfigurationPtr base_configuration, + const std::optional & format_settings_, + ContextPtr local_context) + { + auto metadata = DataLakeMetadata::create(object_storage_, base_configuration, local_context); + + auto schema_from_metadata = metadata->getTableSchema(); + if (schema_from_metadata != NamesAndTypesList{}) + { + return ColumnsDescription(std::move(schema_from_metadata)); + } + else + { + ConfigurationPtr configuration = base_configuration->clone(); + configuration->setPaths(metadata->getDataFiles()); + return Storage::resolveSchemaFromData( + object_storage_, configuration, format_settings_, local_context); + } + } + + void updateConfiguration(ContextPtr local_context) override + { + Storage::updateConfiguration(local_context); + + auto new_metadata = DataLakeMetadata::create(Storage::object_storage, base_configuration, local_context); + + if (current_metadata && *current_metadata == *new_metadata) + return; + + current_metadata = std::move(new_metadata); + auto updated_configuration = base_configuration->clone(); + updated_configuration->setPaths(current_metadata->getDataFiles()); + + Storage::configuration = updated_configuration; + } + + template + IStorageDataLake( + ConfigurationPtr base_configuration_, + DataLakeMetadataPtr metadata_, + Args &&... args) + : Storage(std::forward(args)...) + , base_configuration(base_configuration_) + , current_metadata(std::move(metadata_)) + { + if (base_configuration->format == "auto") + { + base_configuration->format = Storage::configuration->format; + } + } + +private: + ConfigurationPtr base_configuration; + DataLakeMetadataPtr current_metadata; +}; + +using StorageIceberg = IStorageDataLake; +using StorageDeltaLake = IStorageDataLake; +using StorageHudi = IStorageDataLake; + +} + +#endif diff --git a/src/Storages/DataLakes/Iceberg/IcebergMetadata.cpp b/src/Storages/ObjectStorage/DataLakes/IcebergMetadata.cpp similarity index 93% rename from src/Storages/DataLakes/Iceberg/IcebergMetadata.cpp rename to src/Storages/ObjectStorage/DataLakes/IcebergMetadata.cpp index a50fc2972df..0484f86542c 100644 --- a/src/Storages/DataLakes/Iceberg/IcebergMetadata.cpp +++ b/src/Storages/ObjectStorage/DataLakes/IcebergMetadata.cpp @@ -21,11 +21,11 @@ #include #include #include +#include #include #include -#include -#include -#include +#include +#include #include #include @@ -45,7 +45,8 @@ extern const int UNSUPPORTED_METHOD; } IcebergMetadata::IcebergMetadata( - const StorageS3::Configuration & configuration_, + ObjectStoragePtr object_storage_, + ConfigurationPtr configuration_, DB::ContextPtr context_, Int32 metadata_version_, Int32 format_version_, @@ -53,6 +54,7 @@ IcebergMetadata::IcebergMetadata( Int32 current_schema_id_, DB::NamesAndTypesList schema_) : WithContext(context_) + , object_storage(object_storage_) , configuration(configuration_) , metadata_version(metadata_version_) , format_version(format_version_) @@ -338,15 +340,17 @@ MutableColumns parseAvro( * 1) v.metadata.json, where V - metadata version. * 2) -.metadata.json, where V - metadata version */ -std::pair getMetadataFileAndVersion(const StorageS3::Configuration & configuration) +std::pair getMetadataFileAndVersion( + ObjectStoragePtr object_storage, + const StorageObjectStorage::Configuration & configuration) { - const auto metadata_files = S3DataLakeMetadataReadHelper::listFiles(configuration, "metadata", ".metadata.json"); + const auto metadata_files = listFiles(*object_storage, configuration, "metadata", ".metadata.json"); if (metadata_files.empty()) { throw Exception( ErrorCodes::FILE_DOESNT_EXIST, "The metadata file for Iceberg table with path {} doesn't exist", - configuration.url.key); + configuration.getPath()); } std::vector> metadata_files_with_versions; @@ -373,11 +377,15 @@ std::pair getMetadataFileAndVersion(const StorageS3::Configuratio } -std::unique_ptr parseIcebergMetadata(const StorageS3::Configuration & configuration, ContextPtr context_) +DataLakeMetadataPtr IcebergMetadata::create( + ObjectStoragePtr object_storage, + ConfigurationPtr configuration, + ContextPtr local_context) { - const auto [metadata_version, metadata_file_path] = getMetadataFileAndVersion(configuration); + const auto [metadata_version, metadata_file_path] = getMetadataFileAndVersion(object_storage, *configuration); LOG_DEBUG(getLogger("IcebergMetadata"), "Parse metadata {}", metadata_file_path); - auto buf = S3DataLakeMetadataReadHelper::createReadBuffer(metadata_file_path, context_, configuration); + auto read_settings = local_context->getReadSettings(); + auto buf = object_storage->readObject(StoredObject(metadata_file_path), read_settings); String json_str; readJSONObjectPossiblyInvalid(json_str, *buf); @@ -386,7 +394,7 @@ std::unique_ptr parseIcebergMetadata(const StorageS3::Configura Poco::JSON::Object::Ptr object = json.extract(); auto format_version = object->getValue("format-version"); - auto [schema, schema_id] = parseTableSchema(object, format_version, context_->getSettingsRef().iceberg_engine_ignore_schema_evolution); + auto [schema, schema_id] = parseTableSchema(object, format_version, local_context->getSettingsRef().iceberg_engine_ignore_schema_evolution); auto current_snapshot_id = object->getValue("current-snapshot-id"); auto snapshots = object->get("snapshots").extract(); @@ -398,12 +406,12 @@ std::unique_ptr parseIcebergMetadata(const StorageS3::Configura if (snapshot->getValue("snapshot-id") == current_snapshot_id) { const auto path = snapshot->getValue("manifest-list"); - manifest_list_file = std::filesystem::path(configuration.url.key) / "metadata" / std::filesystem::path(path).filename(); + manifest_list_file = std::filesystem::path(configuration->getPath()) / "metadata" / std::filesystem::path(path).filename(); break; } } - return std::make_unique(configuration, context_, metadata_version, format_version, manifest_list_file, schema_id, schema); + return std::make_unique(object_storage, configuration, local_context, metadata_version, format_version, manifest_list_file, schema_id, schema); } /** @@ -431,7 +439,7 @@ std::unique_ptr parseIcebergMetadata(const StorageS3::Configura * │ 1 │ 2252246380142525104 │ ('/iceberg_data/db/table_name/data/a=2/00000-1-c9535a00-2f4f-405c-bcfa-6d4f9f477235-00003.parquet','PARQUET',(2),1,631,67108864,[(1,46),(2,48)],[(1,1),(2,1)],[(1,0),(2,0)],[],[(1,'\0\0\0\0\0\0\0'),(2,'3')],[(1,'\0\0\0\0\0\0\0'),(2,'3')],NULL,[4],0) │ * └────────┴─────────────────────┴────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ */ -Strings IcebergMetadata::getDataFiles() +Strings IcebergMetadata::getDataFiles() const { if (!data_files.empty()) return data_files; @@ -442,12 +450,14 @@ Strings IcebergMetadata::getDataFiles() LOG_TEST(log, "Collect manifest files from manifest list {}", manifest_list_file); - auto manifest_list_buf = S3DataLakeMetadataReadHelper::createReadBuffer(manifest_list_file, getContext(), configuration); + auto context = getContext(); + auto read_settings = context->getReadSettings(); + auto manifest_list_buf = object_storage->readObject(StoredObject(manifest_list_file), read_settings); auto manifest_list_file_reader = std::make_unique(std::make_unique(*manifest_list_buf)); auto data_type = AvroSchemaReader::avroNodeToDataType(manifest_list_file_reader->dataSchema().root()->leafAt(0)); Block header{{data_type->createColumn(), data_type, "manifest_path"}}; - auto columns = parseAvro(*manifest_list_file_reader, header, getFormatSettings(getContext())); + auto columns = parseAvro(*manifest_list_file_reader, header, getFormatSettings(context)); auto & col = columns.at(0); if (col->getDataType() != TypeIndex::String) @@ -463,7 +473,7 @@ Strings IcebergMetadata::getDataFiles() { const auto file_path = col_str->getDataAt(i).toView(); const auto filename = std::filesystem::path(file_path).filename(); - manifest_files.emplace_back(std::filesystem::path(configuration.url.key) / "metadata" / filename); + manifest_files.emplace_back(std::filesystem::path(configuration->getPath()) / "metadata" / filename); } NameSet files; @@ -472,7 +482,7 @@ Strings IcebergMetadata::getDataFiles() { LOG_TEST(log, "Process manifest file {}", manifest_file); - auto buffer = S3DataLakeMetadataReadHelper::createReadBuffer(manifest_file, getContext(), configuration); + auto buffer = object_storage->readObject(StoredObject(manifest_file), read_settings); auto manifest_file_reader = std::make_unique(std::make_unique(*buffer)); /// Manifest file should always have table schema in avro file metadata. By now we don't support tables with evolved schema, @@ -483,7 +493,7 @@ Strings IcebergMetadata::getDataFiles() Poco::JSON::Parser parser; Poco::Dynamic::Var json = parser.parse(schema_json_string); Poco::JSON::Object::Ptr schema_object = json.extract(); - if (!getContext()->getSettingsRef().iceberg_engine_ignore_schema_evolution && schema_object->getValue("schema-id") != current_schema_id) + if (!context->getSettingsRef().iceberg_engine_ignore_schema_evolution && schema_object->getValue("schema-id") != current_schema_id) throw Exception( ErrorCodes::UNSUPPORTED_METHOD, "Cannot read Iceberg table: the table schema has been changed at least 1 time, reading tables with evolved schema is not " @@ -596,9 +606,9 @@ Strings IcebergMetadata::getDataFiles() const auto status = status_int_column->getInt(i); const auto data_path = std::string(file_path_string_column->getDataAt(i).toView()); - const auto pos = data_path.find(configuration.url.key); + const auto pos = data_path.find(configuration->getPath()); if (pos == std::string::npos) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected to find {} in data path: {}", configuration.url.key, data_path); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected to find {} in data path: {}", configuration->getPath(), data_path); const auto file_path = data_path.substr(pos); diff --git a/src/Storages/DataLakes/Iceberg/IcebergMetadata.h b/src/Storages/ObjectStorage/DataLakes/IcebergMetadata.h similarity index 69% rename from src/Storages/DataLakes/Iceberg/IcebergMetadata.h rename to src/Storages/ObjectStorage/DataLakes/IcebergMetadata.h index 3e6a2ec3415..06dbd373bf9 100644 --- a/src/Storages/DataLakes/Iceberg/IcebergMetadata.h +++ b/src/Storages/ObjectStorage/DataLakes/IcebergMetadata.h @@ -2,9 +2,11 @@ #if USE_AWS_S3 && USE_AVRO /// StorageIceberg depending on Avro to parse metadata with Avro format. -#include #include #include +#include +#include +#include namespace DB { @@ -56,40 +58,55 @@ namespace DB * "metadata-log" : [ ] * } */ -class IcebergMetadata : WithContext +class IcebergMetadata : public IDataLakeMetadata, private WithContext { public: - IcebergMetadata(const StorageS3::Configuration & configuration_, - ContextPtr context_, - Int32 metadata_version_, - Int32 format_version_, - String manifest_list_file_, - Int32 current_schema_id_, - NamesAndTypesList schema_); + using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr; + + static constexpr auto name = "Iceberg"; + + IcebergMetadata( + ObjectStoragePtr object_storage_, + ConfigurationPtr configuration_, + ContextPtr context_, + Int32 metadata_version_, + Int32 format_version_, + String manifest_list_file_, + Int32 current_schema_id_, + NamesAndTypesList schema_); /// Get data files. On first request it reads manifest_list file and iterates through manifest files to find all data files. /// All subsequent calls will return saved list of files (because it cannot be changed without changing metadata file) - Strings getDataFiles(); + Strings getDataFiles() const override; /// Get table schema parsed from metadata. - NamesAndTypesList getTableSchema() const { return schema; } + NamesAndTypesList getTableSchema() const override { return schema; } - size_t getVersion() const { return metadata_version; } + bool operator ==(const IDataLakeMetadata & other) const override + { + const auto * iceberg_metadata = dynamic_cast(&other); + return iceberg_metadata && getVersion() == iceberg_metadata->getVersion(); + } + + static DataLakeMetadataPtr create( + ObjectStoragePtr object_storage, + ConfigurationPtr configuration, + ContextPtr local_context); private: - const StorageS3::Configuration configuration; + size_t getVersion() const { return metadata_version; } + + const ObjectStoragePtr object_storage; + const ConfigurationPtr configuration; Int32 metadata_version; Int32 format_version; String manifest_list_file; Int32 current_schema_id; NamesAndTypesList schema; - Strings data_files; + mutable Strings data_files; LoggerPtr log; - }; -std::unique_ptr parseIcebergMetadata(const StorageS3::Configuration & configuration, ContextPtr context); - } #endif diff --git a/src/Storages/ObjectStorage/DataLakes/registerDataLakeStorages.cpp b/src/Storages/ObjectStorage/DataLakes/registerDataLakeStorages.cpp new file mode 100644 index 00000000000..0fa6402e892 --- /dev/null +++ b/src/Storages/ObjectStorage/DataLakes/registerDataLakeStorages.cpp @@ -0,0 +1,82 @@ +#include "config.h" + +#if USE_AWS_S3 + +#include +#include +#include +#include + + +namespace DB +{ + +#if USE_AVRO /// StorageIceberg depending on Avro to parse metadata with Avro format. + +void registerStorageIceberg(StorageFactory & factory) +{ + factory.registerStorage( + "Iceberg", + [&](const StorageFactory::Arguments & args) + { + auto configuration = std::make_shared(); + StorageObjectStorage::Configuration::initialize(*configuration, args.engine_args, args.getLocalContext(), false); + + return StorageIceberg::create( + configuration, args.getContext(), args.table_id, args.columns, + args.constraints, args.comment, std::nullopt, args.mode); + }, + { + .supports_settings = false, + .supports_schema_inference = true, + .source_access_type = AccessType::S3, + }); +} + +#endif + +#if USE_PARQUET +void registerStorageDeltaLake(StorageFactory & factory) +{ + factory.registerStorage( + "DeltaLake", + [&](const StorageFactory::Arguments & args) + { + auto configuration = std::make_shared(); + StorageObjectStorage::Configuration::initialize(*configuration, args.engine_args, args.getLocalContext(), false); + + return StorageDeltaLake::create( + configuration, args.getContext(), args.table_id, args.columns, + args.constraints, args.comment, std::nullopt, args.mode); + }, + { + .supports_settings = false, + .supports_schema_inference = true, + .source_access_type = AccessType::S3, + }); +} +#endif + +void registerStorageHudi(StorageFactory & factory) +{ + factory.registerStorage( + "Hudi", + [&](const StorageFactory::Arguments & args) + { + auto configuration = std::make_shared(); + StorageObjectStorage::Configuration::initialize(*configuration, args.engine_args, args.getLocalContext(), false); + + return StorageHudi::create( + configuration, args.getContext(), args.table_id, args.columns, + args.constraints, args.comment, std::nullopt, args.mode); + }, + { + .supports_settings = false, + .supports_schema_inference = true, + .source_access_type = AccessType::S3, + }); +} + +} + +#endif diff --git a/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.cpp b/src/Storages/ObjectStorage/HDFS/AsynchronousReadBufferFromHDFS.cpp similarity index 99% rename from src/Storages/HDFS/AsynchronousReadBufferFromHDFS.cpp rename to src/Storages/ObjectStorage/HDFS/AsynchronousReadBufferFromHDFS.cpp index 6b6151f5474..21df7e35284 100644 --- a/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.cpp +++ b/src/Storages/ObjectStorage/HDFS/AsynchronousReadBufferFromHDFS.cpp @@ -1,9 +1,9 @@ #include "AsynchronousReadBufferFromHDFS.h" #if USE_HDFS +#include "ReadBufferFromHDFS.h" #include #include -#include #include #include diff --git a/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.h b/src/Storages/ObjectStorage/HDFS/AsynchronousReadBufferFromHDFS.h similarity index 96% rename from src/Storages/HDFS/AsynchronousReadBufferFromHDFS.h rename to src/Storages/ObjectStorage/HDFS/AsynchronousReadBufferFromHDFS.h index 10e2749fd4a..5aef92315a4 100644 --- a/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.h +++ b/src/Storages/ObjectStorage/HDFS/AsynchronousReadBufferFromHDFS.h @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include namespace DB diff --git a/src/Storages/ObjectStorage/HDFS/Configuration.cpp b/src/Storages/ObjectStorage/HDFS/Configuration.cpp new file mode 100644 index 00000000000..a8a9ab5b557 --- /dev/null +++ b/src/Storages/ObjectStorage/HDFS/Configuration.cpp @@ -0,0 +1,217 @@ +#include + +#if USE_HDFS +#include +#include +#include +#include + +#include +#include + +#include +#include +#include + +#include +#include +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + +StorageHDFSConfiguration::StorageHDFSConfiguration(const StorageHDFSConfiguration & other) + : Configuration(other) +{ + url = other.url; + path = other.path; + paths = other.paths; +} + +void StorageHDFSConfiguration::check(ContextPtr context) const +{ + context->getRemoteHostFilter().checkURL(Poco::URI(url)); + checkHDFSURL(fs::path(url) / path.substr(1)); + Configuration::check(context); +} + +ObjectStoragePtr StorageHDFSConfiguration::createObjectStorage( /// NOLINT + ContextPtr context, + bool /* is_readonly */) +{ + assertInitialized(); + const auto & settings = context->getSettingsRef(); + auto hdfs_settings = std::make_unique( + settings.remote_read_min_bytes_for_seek, + settings.hdfs_replication + ); + return std::make_shared( + url, std::move(hdfs_settings), context->getConfigRef(), /* lazy_initialize */true); +} + +std::string StorageHDFSConfiguration::getPathWithoutGlobs() const +{ + /// Unlike s3 and azure, which are object storages, + /// hdfs is a filesystem, so it cannot list files by partual prefix, + /// only by directory. + auto first_glob_pos = path.find_first_of("*?{"); + auto end_of_path_without_globs = path.substr(0, first_glob_pos).rfind('/'); + if (end_of_path_without_globs == std::string::npos || end_of_path_without_globs == 0) + return "/"; + return path.substr(0, end_of_path_without_globs); +} +StorageObjectStorage::QuerySettings StorageHDFSConfiguration::getQuerySettings(const ContextPtr & context) const +{ + const auto & settings = context->getSettingsRef(); + return StorageObjectStorage::QuerySettings{ + .truncate_on_insert = settings.hdfs_truncate_on_insert, + .create_new_file_on_insert = settings.hdfs_create_new_file_on_insert, + .schema_inference_use_cache = settings.schema_inference_use_cache_for_hdfs, + .schema_inference_mode = settings.schema_inference_mode, + .skip_empty_files = settings.hdfs_skip_empty_files, + .list_object_keys_size = 0, /// HDFS does not support listing in batches. + .throw_on_zero_files_match = settings.hdfs_throw_on_zero_files_match, + .ignore_non_existent_file = settings.hdfs_ignore_file_doesnt_exist, + }; +} + +void StorageHDFSConfiguration::fromAST(ASTs & args, ContextPtr context, bool with_structure) +{ + const size_t max_args_num = with_structure ? 4 : 3; + if (args.empty() || args.size() > max_args_num) + { + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Expected not more than {} arguments", max_args_num); + } + + std::string url_str; + url_str = checkAndGetLiteralArgument(args[0], "url"); + + for (auto & arg : args) + arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context); + + if (args.size() > 1) + { + format = checkAndGetLiteralArgument(args[1], "format_name"); + } + + if (with_structure) + { + if (args.size() > 2) + { + structure = checkAndGetLiteralArgument(args[2], "structure"); + } + if (args.size() > 3) + { + compression_method = checkAndGetLiteralArgument(args[3], "compression_method"); + } + } + else if (args.size() > 2) + { + compression_method = checkAndGetLiteralArgument(args[2], "compression_method"); + } + + setURL(url_str); +} + +void StorageHDFSConfiguration::fromNamedCollection(const NamedCollection & collection) +{ + std::string url_str; + + auto filename = collection.getOrDefault("filename", ""); + if (!filename.empty()) + url_str = std::filesystem::path(collection.get("url")) / filename; + else + url_str = collection.get("url"); + + format = collection.getOrDefault("format", "auto"); + compression_method = collection.getOrDefault("compression_method", + collection.getOrDefault("compression", "auto")); + structure = collection.getOrDefault("structure", "auto"); + + setURL(url_str); +} + +void StorageHDFSConfiguration::setURL(const std::string & url_) +{ + auto pos = url_.find("//"); + if (pos == std::string::npos) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bad hdfs url: {}", url_); + + pos = url_.find('/', pos + 2); + if (pos == std::string::npos) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bad hdfs url: {}", url_); + + path = url_.substr(pos + 1); + if (!path.starts_with('/')) + path = '/' + path; + + url = url_.substr(0, pos); + paths = {path}; + + LOG_TRACE(getLogger("StorageHDFSConfiguration"), "Using url: {}, path: {}", url, path); +} + +void StorageHDFSConfiguration::addStructureAndFormatToArgs( + ASTs & args, + const String & structure_, + const String & format_, + ContextPtr context) +{ + if (tryGetNamedCollectionWithOverrides(args, context)) + { + /// In case of named collection, just add key-value pair "structure='...'" + /// at the end of arguments to override existed structure. + ASTs equal_func_args = {std::make_shared("structure"), std::make_shared(structure_)}; + auto equal_func = makeASTFunction("equals", std::move(equal_func_args)); + args.push_back(equal_func); + } + else + { + size_t count = args.size(); + if (count == 0 || count > 4) + { + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Expected 1 to 4 arguments in table function, got {}", count); + } + + auto format_literal = std::make_shared(format_); + auto structure_literal = std::make_shared(structure_); + + for (auto & arg : args) + arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context); + + /// hdfs(url) + if (count == 1) + { + /// Add format=auto before structure argument. + args.push_back(std::make_shared("auto")); + args.push_back(structure_literal); + } + /// hdfs(url, format) + else if (count == 2) + { + if (checkAndGetLiteralArgument(args[1], "format") == "auto") + args.back() = format_literal; + args.push_back(structure_literal); + } + /// hdfs(url, format, structure) + /// hdfs(url, format, structure, compression_method) + else if (count >= 3) + { + if (checkAndGetLiteralArgument(args[1], "format") == "auto") + args[1] = format_literal; + if (checkAndGetLiteralArgument(args[2], "structure") == "auto") + args[2] = structure_literal; + } + } +} + +} + +#endif diff --git a/src/Storages/ObjectStorage/HDFS/Configuration.h b/src/Storages/ObjectStorage/HDFS/Configuration.h new file mode 100644 index 00000000000..01a8b9c5e3b --- /dev/null +++ b/src/Storages/ObjectStorage/HDFS/Configuration.h @@ -0,0 +1,60 @@ +#pragma once +#include "config.h" + +#if USE_HDFS +#include +#include +#include + +namespace DB +{ + +class StorageHDFSConfiguration : public StorageObjectStorage::Configuration +{ +public: + using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr; + + static constexpr auto type_name = "hdfs"; + static constexpr auto engine_name = "HDFS"; + + StorageHDFSConfiguration() = default; + StorageHDFSConfiguration(const StorageHDFSConfiguration & other); + + std::string getTypeName() const override { return type_name; } + std::string getEngineName() const override { return engine_name; } + + Path getPath() const override { return path; } + void setPath(const Path & path_) override { path = path_; } + + const Paths & getPaths() const override { return paths; } + void setPaths(const Paths & paths_) override { paths = paths_; } + std::string getPathWithoutGlobs() const override; + + String getNamespace() const override { return ""; } + String getDataSourceDescription() const override { return url; } + StorageObjectStorage::QuerySettings getQuerySettings(const ContextPtr &) const override; + + void check(ContextPtr context) const override; + ConfigurationPtr clone() override { return std::make_shared(*this); } + + ObjectStoragePtr createObjectStorage(ContextPtr context, bool is_readonly) override; + + void addStructureAndFormatToArgs( + ASTs & args, + const String & structure_, + const String & format_, + ContextPtr context) override; + +private: + void fromNamedCollection(const NamedCollection &) override; + void fromAST(ASTs & args, ContextPtr, bool /* with_structure */) override; + void setURL(const std::string & url_); + + String url; + String path; + std::vector paths; +}; + +} + +#endif diff --git a/src/Storages/HDFS/HDFSCommon.cpp b/src/Storages/ObjectStorage/HDFS/HDFSCommon.cpp similarity index 99% rename from src/Storages/HDFS/HDFSCommon.cpp rename to src/Storages/ObjectStorage/HDFS/HDFSCommon.cpp index 9eb0d10cc16..365828bcc40 100644 --- a/src/Storages/HDFS/HDFSCommon.cpp +++ b/src/Storages/ObjectStorage/HDFS/HDFSCommon.cpp @@ -1,4 +1,4 @@ -#include +#include "HDFSCommon.h" #include #include #include diff --git a/src/Storages/HDFS/HDFSCommon.h b/src/Storages/ObjectStorage/HDFS/HDFSCommon.h similarity index 100% rename from src/Storages/HDFS/HDFSCommon.h rename to src/Storages/ObjectStorage/HDFS/HDFSCommon.h diff --git a/src/Storages/HDFS/ReadBufferFromHDFS.cpp b/src/Storages/ObjectStorage/HDFS/ReadBufferFromHDFS.cpp similarity index 98% rename from src/Storages/HDFS/ReadBufferFromHDFS.cpp rename to src/Storages/ObjectStorage/HDFS/ReadBufferFromHDFS.cpp index 4df05d47003..be339d021dc 100644 --- a/src/Storages/HDFS/ReadBufferFromHDFS.cpp +++ b/src/Storages/ObjectStorage/HDFS/ReadBufferFromHDFS.cpp @@ -1,11 +1,12 @@ #include "ReadBufferFromHDFS.h" #if USE_HDFS -#include +#include "HDFSCommon.h" #include #include #include #include +#include #include #include @@ -55,10 +56,10 @@ struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemory(use_external_buffer_ ? 0 : read_settings_.remote_fs_buffer_size) , hdfs_uri(hdfs_uri_) , hdfs_file_path(hdfs_file_path_) - , builder(createHDFSBuilder(hdfs_uri_, config_)) , read_settings(read_settings_) , read_until_position(read_until_position_) { + builder = createHDFSBuilder(hdfs_uri_, config_); fs = createHDFSFS(builder.get()); fin = hdfsOpenFile(fs.get(), hdfs_file_path.c_str(), O_RDONLY, 0, 0, 0); @@ -100,7 +101,9 @@ struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemory {})", file_offset, read_until_position - 1); @@ -145,6 +148,7 @@ struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemoryadd(bytes_read, ProfileEvents::RemoteReadThrottlerBytes, ProfileEvents::RemoteReadThrottlerSleepMicroseconds); + return true; } diff --git a/src/Storages/HDFS/ReadBufferFromHDFS.h b/src/Storages/ObjectStorage/HDFS/ReadBufferFromHDFS.h similarity index 100% rename from src/Storages/HDFS/ReadBufferFromHDFS.h rename to src/Storages/ObjectStorage/HDFS/ReadBufferFromHDFS.h diff --git a/src/Storages/HDFS/WriteBufferFromHDFS.cpp b/src/Storages/ObjectStorage/HDFS/WriteBufferFromHDFS.cpp similarity index 93% rename from src/Storages/HDFS/WriteBufferFromHDFS.cpp rename to src/Storages/ObjectStorage/HDFS/WriteBufferFromHDFS.cpp index 173dd899ada..2c14b38ce01 100644 --- a/src/Storages/HDFS/WriteBufferFromHDFS.cpp +++ b/src/Storages/ObjectStorage/HDFS/WriteBufferFromHDFS.cpp @@ -2,8 +2,8 @@ #if USE_HDFS -#include -#include +#include "WriteBufferFromHDFS.h" +#include "HDFSCommon.h" #include #include #include @@ -48,12 +48,13 @@ struct WriteBufferFromHDFS::WriteBufferFromHDFSImpl const size_t begin_of_path = hdfs_uri.find('/', hdfs_uri.find("//") + 2); const String path = hdfs_uri.substr(begin_of_path); - fout = hdfsOpenFile(fs.get(), path.c_str(), flags, 0, replication_, 0); /// O_WRONLY meaning create or overwrite i.e., implies O_TRUNCAT here + /// O_WRONLY meaning create or overwrite i.e., implies O_TRUNCAT here + fout = hdfsOpenFile(fs.get(), path.c_str(), flags, 0, replication_, 0); if (fout == nullptr) { - throw Exception(ErrorCodes::CANNOT_OPEN_FILE, "Unable to open HDFS file: {} error: {}", - path, std::string(hdfsGetLastError())); + throw Exception(ErrorCodes::CANNOT_OPEN_FILE, "Unable to open HDFS file: {} ({}) error: {}", + path, hdfs_uri, std::string(hdfsGetLastError())); } } diff --git a/src/Storages/HDFS/WriteBufferFromHDFS.h b/src/Storages/ObjectStorage/HDFS/WriteBufferFromHDFS.h similarity index 100% rename from src/Storages/HDFS/WriteBufferFromHDFS.h rename to src/Storages/ObjectStorage/HDFS/WriteBufferFromHDFS.h diff --git a/src/Storages/ObjectStorage/ReadBufferIterator.cpp b/src/Storages/ObjectStorage/ReadBufferIterator.cpp new file mode 100644 index 00000000000..78cdc442f64 --- /dev/null +++ b/src/Storages/ObjectStorage/ReadBufferIterator.cpp @@ -0,0 +1,289 @@ +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int CANNOT_EXTRACT_TABLE_STRUCTURE; + extern const int CANNOT_DETECT_FORMAT; +} + +ReadBufferIterator::ReadBufferIterator( + ObjectStoragePtr object_storage_, + ConfigurationPtr configuration_, + const FileIterator & file_iterator_, + const std::optional & format_settings_, + SchemaCache & schema_cache_, + ObjectInfos & read_keys_, + const ContextPtr & context_) + : WithContext(context_) + , object_storage(object_storage_) + , configuration(configuration_) + , file_iterator(file_iterator_) + , format_settings(format_settings_) + , query_settings(configuration->getQuerySettings(context_)) + , schema_cache(schema_cache_) + , read_keys(read_keys_) + , prev_read_keys_size(read_keys_.size()) +{ + if (configuration->format != "auto") + format = configuration->format; +} + +SchemaCache::Key ReadBufferIterator::getKeyForSchemaCache(const ObjectInfo & object_info, const String & format_name) const +{ + auto source = StorageObjectStorageSource::getUniqueStoragePathIdentifier(*configuration, object_info); + return DB::getKeyForSchemaCache(source, format_name, format_settings, getContext()); +} + +SchemaCache::Keys ReadBufferIterator::getKeysForSchemaCache() const +{ + Strings sources; + sources.reserve(read_keys.size()); + std::transform( + read_keys.begin(), read_keys.end(), + std::back_inserter(sources), + [&](const auto & elem) + { + return StorageObjectStorageSource::getUniqueStoragePathIdentifier(*configuration, *elem); + }); + return DB::getKeysForSchemaCache(sources, *format, format_settings, getContext()); +} + +std::optional ReadBufferIterator::tryGetColumnsFromCache( + const ObjectInfos::iterator & begin, + const ObjectInfos::iterator & end) +{ + if (!query_settings.schema_inference_use_cache) + return std::nullopt; + + for (auto it = begin; it < end; ++it) + { + const auto & object_info = (*it); + auto get_last_mod_time = [&] -> std::optional + { + const auto & path = object_info->isArchive() ? object_info->getPathToArchive() : object_info->getPath(); + if (!object_info->metadata) + object_info->metadata = object_storage->tryGetObjectMetadata(path); + + return object_info->metadata + ? std::optional(object_info->metadata->last_modified.epochTime()) + : std::nullopt; + }; + + if (format) + { + const auto cache_key = getKeyForSchemaCache(*object_info, *format); + if (auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time)) + return columns; + } + else + { + /// If format is unknown, we can iterate through all possible input formats + /// and check if we have an entry with this format and this file in schema cache. + /// If we have such entry for some format, we can use this format to read the file. + for (const auto & format_name : FormatFactory::instance().getAllInputFormats()) + { + const auto cache_key = getKeyForSchemaCache(*object_info, format_name); + if (auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time)) + { + /// Now format is known. It should be the same for all files. + format = format_name; + return columns; + } + } + } + } + return std::nullopt; +} + +void ReadBufferIterator::setNumRowsToLastFile(size_t num_rows) +{ + if (query_settings.schema_inference_use_cache) + schema_cache.addNumRows(getKeyForSchemaCache(*current_object_info, *format), num_rows); +} + +void ReadBufferIterator::setSchemaToLastFile(const ColumnsDescription & columns) +{ + if (query_settings.schema_inference_use_cache + && query_settings.schema_inference_mode == SchemaInferenceMode::UNION) + { + schema_cache.addColumns(getKeyForSchemaCache(*current_object_info, *format), columns); + } +} + +void ReadBufferIterator::setResultingSchema(const ColumnsDescription & columns) +{ + if (query_settings.schema_inference_use_cache + && query_settings.schema_inference_mode == SchemaInferenceMode::DEFAULT) + { + schema_cache.addManyColumns(getKeysForSchemaCache(), columns); + } +} + +void ReadBufferIterator::setFormatName(const String & format_name) +{ + format = format_name; +} + +String ReadBufferIterator::getLastFileName() const +{ + if (current_object_info) + return current_object_info->getPath(); + else + return ""; +} + +std::unique_ptr ReadBufferIterator::recreateLastReadBuffer() +{ + auto context = getContext(); + + const auto & path = current_object_info->isArchive() ? current_object_info->getPathToArchive() : current_object_info->getPath(); + auto impl = object_storage->readObject(StoredObject(path), context->getReadSettings()); + + const auto compression_method = chooseCompressionMethod(current_object_info->getFileName(), configuration->compression_method); + const auto zstd_window_log_max = static_cast(context->getSettingsRef().zstd_window_log_max); + + return wrapReadBufferWithCompressionMethod(std::move(impl), compression_method, zstd_window_log_max); +} + +ReadBufferIterator::Data ReadBufferIterator::next() +{ + if (first) + { + /// If format is unknown we iterate through all currently read keys on first iteration and + /// try to determine format by file name. + if (!format) + { + for (const auto & object_info : read_keys) + { + if (auto format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName(object_info->getFileName())) + { + format = format_from_file_name; + break; + } + } + } + + /// For default mode check cached columns for currently read keys on first iteration. + if (first && getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT) + { + if (auto cached_columns = tryGetColumnsFromCache(read_keys.begin(), read_keys.end())) + { + return {nullptr, cached_columns, format}; + } + } + } + + while (true) + { + current_object_info = file_iterator->next(0); + + if (!current_object_info) + { + if (first) + { + if (format.has_value()) + { + throw Exception( + ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, + "The table structure cannot be extracted from a {} format file, " + "because there are no files with provided path " + "in {} or all files are empty. You can specify table structure manually", + *format, object_storage->getName()); + } + + throw Exception( + ErrorCodes::CANNOT_DETECT_FORMAT, + "The data format cannot be detected by the contents of the files, " + "because there are no files with provided path " + "in {} or all files are empty. You can specify the format manually", + object_storage->getName()); + } + + return {nullptr, std::nullopt, format}; + } + + const auto filename = current_object_info->getFileName(); + chassert(!filename.empty()); + + /// file iterator could get new keys after new iteration + if (read_keys.size() > prev_read_keys_size) + { + /// If format is unknown we can try to determine it by new file names. + if (!format) + { + for (auto it = read_keys.begin() + prev_read_keys_size; it != read_keys.end(); ++it) + { + if (auto format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName((*it)->getFileName())) + { + format = format_from_file_name; + break; + } + } + } + + /// Check new files in schema cache if schema inference mode is default. + if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT) + { + auto columns_from_cache = tryGetColumnsFromCache(read_keys.begin() + prev_read_keys_size, read_keys.end()); + if (columns_from_cache) + return {nullptr, columns_from_cache, format}; + } + + prev_read_keys_size = read_keys.size(); + } + + if (query_settings.skip_empty_files + && current_object_info->metadata && current_object_info->metadata->size_bytes == 0) + continue; + + /// In union mode, check cached columns only for current key. + if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::UNION) + { + ObjectInfos objects{current_object_info}; + if (auto columns_from_cache = tryGetColumnsFromCache(objects.begin(), objects.end())) + { + first = false; + return {nullptr, columns_from_cache, format}; + } + } + + std::unique_ptr read_buf; + CompressionMethod compression_method; + using ObjectInfoInArchive = StorageObjectStorageSource::ArchiveIterator::ObjectInfoInArchive; + if (const auto * object_info_in_archive = dynamic_cast(current_object_info.get())) + { + compression_method = chooseCompressionMethod(filename, configuration->compression_method); + const auto & archive_reader = object_info_in_archive->archive_reader; + read_buf = archive_reader->readFile(object_info_in_archive->path_in_archive, /*throw_on_not_found=*/true); + } + else + { + compression_method = chooseCompressionMethod(filename, configuration->compression_method); + read_buf = object_storage->readObject( + StoredObject(current_object_info->getPath()), + getContext()->getReadSettings(), + {}, + current_object_info->metadata->size_bytes); + } + + if (!query_settings.skip_empty_files || !read_buf->eof()) + { + first = false; + + read_buf = wrapReadBufferWithCompressionMethod( + std::move(read_buf), + compression_method, + static_cast(getContext()->getSettingsRef().zstd_window_log_max)); + + return {std::move(read_buf), std::nullopt, format}; + } + } +} +} diff --git a/src/Storages/ObjectStorage/ReadBufferIterator.h b/src/Storages/ObjectStorage/ReadBufferIterator.h new file mode 100644 index 00000000000..6eeb52ec2ed --- /dev/null +++ b/src/Storages/ObjectStorage/ReadBufferIterator.h @@ -0,0 +1,63 @@ +#pragma once +#include +#include +#include + + +namespace DB +{ + +class ReadBufferIterator : public IReadBufferIterator, WithContext +{ +public: + using FileIterator = std::shared_ptr; + using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr; + using ObjectInfoPtr = StorageObjectStorage::ObjectInfoPtr; + using ObjectInfo = StorageObjectStorage::ObjectInfo; + using ObjectInfos = StorageObjectStorage::ObjectInfos; + + ReadBufferIterator( + ObjectStoragePtr object_storage_, + ConfigurationPtr configuration_, + const FileIterator & file_iterator_, + const std::optional & format_settings_, + SchemaCache & schema_cache_, + ObjectInfos & read_keys_, + const ContextPtr & context_); + + Data next() override; + + void setNumRowsToLastFile(size_t num_rows) override; + + void setSchemaToLastFile(const ColumnsDescription & columns) override; + + void setResultingSchema(const ColumnsDescription & columns) override; + + String getLastFileName() const override; + + void setFormatName(const String & format_name) override; + + bool supportsLastReadBufferRecreation() const override { return true; } + + std::unique_ptr recreateLastReadBuffer() override; + +private: + SchemaCache::Key getKeyForSchemaCache(const ObjectInfo & object_info, const String & format_name) const; + SchemaCache::Keys getKeysForSchemaCache() const; + std::optional tryGetColumnsFromCache( + const ObjectInfos::iterator & begin, const ObjectInfos::iterator & end); + + ObjectStoragePtr object_storage; + const ConfigurationPtr configuration; + const FileIterator file_iterator; + const std::optional & format_settings; + const StorageObjectStorage::QuerySettings query_settings; + SchemaCache & schema_cache; + ObjectInfos & read_keys; + std::optional format; + + size_t prev_read_keys_size; + ObjectInfoPtr current_object_info; + bool first = true; +}; +} diff --git a/src/Storages/ObjectStorage/S3/Configuration.cpp b/src/Storages/ObjectStorage/S3/Configuration.cpp new file mode 100644 index 00000000000..4b217b94730 --- /dev/null +++ b/src/Storages/ObjectStorage/S3/Configuration.cpp @@ -0,0 +1,472 @@ +#include + +#if USE_AWS_S3 +#include +#include +#include + +#include +#include + +#include +#include + +#include +#include +#include + +#include +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int LOGICAL_ERROR; +} + +static const std::unordered_set required_configuration_keys = { + "url", +}; + +static const std::unordered_set optional_configuration_keys = { + "format", + "compression", + "compression_method", + "structure", + "access_key_id", + "secret_access_key", + "session_token", + "filename", + "use_environment_credentials", + "max_single_read_retries", + "min_upload_part_size", + "upload_part_size_multiply_factor", + "upload_part_size_multiply_parts_count_threshold", + "max_single_part_upload_size", + "max_connections", + "expiration_window_seconds", + "no_sign_request" +}; + +String StorageS3Configuration::getDataSourceDescription() const +{ + return std::filesystem::path(url.uri.getHost() + std::to_string(url.uri.getPort())) / url.bucket; +} + +std::string StorageS3Configuration::getPathInArchive() const +{ + if (url.archive_pattern.has_value()) + return url.archive_pattern.value(); + + throw Exception(ErrorCodes::LOGICAL_ERROR, "Path {} is not an archive", getPath()); +} + +void StorageS3Configuration::check(ContextPtr context) const +{ + validateNamespace(url.bucket); + context->getGlobalContext()->getRemoteHostFilter().checkURL(url.uri); + context->getGlobalContext()->getHTTPHeaderFilter().checkHeaders(headers_from_ast); + Configuration::check(context); +} + +void StorageS3Configuration::validateNamespace(const String & name) const +{ + S3::URI::validateBucket(name, {}); +} + +StorageS3Configuration::StorageS3Configuration(const StorageS3Configuration & other) + : Configuration(other) +{ + url = other.url; + static_configuration = other.static_configuration; + headers_from_ast = other.headers_from_ast; + keys = other.keys; +} + +StorageObjectStorage::QuerySettings StorageS3Configuration::getQuerySettings(const ContextPtr & context) const +{ + const auto & settings = context->getSettingsRef(); + return StorageObjectStorage::QuerySettings{ + .truncate_on_insert = settings.s3_truncate_on_insert, + .create_new_file_on_insert = settings.s3_create_new_file_on_insert, + .schema_inference_use_cache = settings.schema_inference_use_cache_for_s3, + .schema_inference_mode = settings.schema_inference_mode, + .skip_empty_files = settings.s3_skip_empty_files, + .list_object_keys_size = settings.s3_list_object_keys_size, + .throw_on_zero_files_match = settings.s3_throw_on_zero_files_match, + .ignore_non_existent_file = settings.s3_ignore_file_doesnt_exist, + }; +} + +ObjectStoragePtr StorageS3Configuration::createObjectStorage(ContextPtr context, bool /* is_readonly */) /// NOLINT +{ + assertInitialized(); + + const auto & config = context->getConfigRef(); + const auto & settings = context->getSettingsRef(); + const std::string config_prefix = "s3."; + + auto s3_settings = getSettings(config, config_prefix, context, settings.s3_validate_request_settings); + + request_settings.updateFromSettingsIfChanged(settings); + auth_settings.updateFrom(s3_settings->auth_settings); + + s3_settings->auth_settings = auth_settings; + s3_settings->request_settings = request_settings; + + if (!headers_from_ast.empty()) + { + s3_settings->auth_settings.headers.insert( + s3_settings->auth_settings.headers.end(), + headers_from_ast.begin(), headers_from_ast.end()); + } + + if (auto endpoint_settings = context->getStorageS3Settings().getSettings(url.uri.toString(), context->getUserName())) + s3_settings->auth_settings.updateFrom(endpoint_settings->auth_settings); + + auto client = getClient(config, config_prefix, context, *s3_settings, false, &url); + auto key_generator = createObjectStorageKeysGeneratorAsIsWithPrefix(url.key); + auto s3_capabilities = S3Capabilities + { + .support_batch_delete = config.getBool("s3.support_batch_delete", true), + .support_proxy = config.getBool("s3.support_proxy", config.has("s3.proxy")), + }; + + return std::make_shared( + std::move(client), std::move(s3_settings), url, s3_capabilities, + key_generator, "StorageS3", false); +} + +void StorageS3Configuration::fromNamedCollection(const NamedCollection & collection) +{ + validateNamedCollection(collection, required_configuration_keys, optional_configuration_keys); + + auto filename = collection.getOrDefault("filename", ""); + if (!filename.empty()) + url = S3::URI(std::filesystem::path(collection.get("url")) / filename); + else + url = S3::URI(collection.get("url")); + + auth_settings.access_key_id = collection.getOrDefault("access_key_id", ""); + auth_settings.secret_access_key = collection.getOrDefault("secret_access_key", ""); + auth_settings.use_environment_credentials = collection.getOrDefault("use_environment_credentials", 1); + auth_settings.no_sign_request = collection.getOrDefault("no_sign_request", false); + auth_settings.expiration_window_seconds = collection.getOrDefault("expiration_window_seconds", S3::DEFAULT_EXPIRATION_WINDOW_SECONDS); + + format = collection.getOrDefault("format", format); + compression_method = collection.getOrDefault("compression_method", collection.getOrDefault("compression", "auto")); + structure = collection.getOrDefault("structure", "auto"); + + request_settings = S3Settings::RequestSettings(collection); + + static_configuration = !auth_settings.access_key_id.empty() || auth_settings.no_sign_request.has_value(); + + keys = {url.key}; +} + +void StorageS3Configuration::fromAST(ASTs & args, ContextPtr context, bool with_structure) +{ + /// Supported signatures: S3('url') S3('url', 'format') S3('url', 'format', 'compression') S3('url', NOSIGN) S3('url', NOSIGN, 'format') S3('url', NOSIGN, 'format', 'compression') S3('url', 'aws_access_key_id', 'aws_secret_access_key') S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'session_token') S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format') S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'session_token', 'format') S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression') + /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'session_token', 'format', 'compression') + /// with optional headers() function + + size_t count = StorageURL::evalArgsAndCollectHeaders(args, headers_from_ast, context); + + if (count == 0 || count > (with_structure ? 7 : 6)) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Storage S3 requires 1 to 5 arguments: " + "url, [NOSIGN | access_key_id, secret_access_key], name of used format and [compression_method]"); + + std::unordered_map engine_args_to_idx; + bool no_sign_request = false; + + /// For 2 arguments we support 2 possible variants: + /// - s3(source, format) + /// - s3(source, NOSIGN) + /// We can distinguish them by looking at the 2-nd argument: check if it's NOSIGN or not. + if (count == 2) + { + auto second_arg = checkAndGetLiteralArgument(args[1], "format/NOSIGN"); + if (boost::iequals(second_arg, "NOSIGN")) + no_sign_request = true; + else + engine_args_to_idx = {{"format", 1}}; + } + /// For 3 arguments we support 2 possible variants: + /// - s3(source, format, compression_method) + /// - s3(source, access_key_id, secret_access_key) + /// - s3(source, NOSIGN, format) + /// We can distinguish them by looking at the 2-nd argument: check if it's NOSIGN or format name. + else if (count == 3) + { + auto second_arg = checkAndGetLiteralArgument(args[1], "format/access_key_id/NOSIGN"); + if (boost::iequals(second_arg, "NOSIGN")) + { + no_sign_request = true; + engine_args_to_idx = {{"format", 2}}; + } + else if (second_arg == "auto" || FormatFactory::instance().exists(second_arg)) + { + if (with_structure) + engine_args_to_idx = {{"format", 1}, {"structure", 2}}; + else + engine_args_to_idx = {{"format", 1}, {"compression_method", 2}}; + } + else + engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}}; + } + /// For 4 arguments we support 3 possible variants: + /// if with_structure == 0: + /// - s3(source, access_key_id, secret_access_key, session_token) + /// - s3(source, access_key_id, secret_access_key, format) + /// - s3(source, NOSIGN, format, compression_method) + /// if with_structure == 1: + /// - s3(source, format, structure, compression_method), + /// - s3(source, access_key_id, secret_access_key, format), + /// - s3(source, access_key_id, secret_access_key, session_token) + /// - s3(source, NOSIGN, format, structure) + /// We can distinguish them by looking at the 2-nd argument: check if it's a NOSIGN or not. + else if (count == 4) + { + auto second_arg = checkAndGetLiteralArgument(args[1], "access_key_id/NOSIGN"); + if (boost::iequals(second_arg, "NOSIGN")) + { + no_sign_request = true; + if (with_structure) + engine_args_to_idx = {{"format", 2}, {"structure", 3}}; + else + engine_args_to_idx = {{"format", 2}, {"compression_method", 3}}; + } + else if (with_structure && (second_arg == "auto" || FormatFactory::instance().exists(second_arg))) + { + engine_args_to_idx = {{"format", 1}, {"structure", 2}, {"compression_method", 3}}; + } + else + { + auto fourth_arg = checkAndGetLiteralArgument(args[3], "session_token/format"); + if (fourth_arg == "auto" || FormatFactory::instance().exists(fourth_arg)) + { + engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}}; + } + else + { + engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}}; + } + } + } + /// For 5 arguments we support 2 possible variants: + /// if with_structure == 0: + /// - s3(source, access_key_id, secret_access_key, session_token, format) + /// - s3(source, access_key_id, secret_access_key, format, compression) + /// if with_structure == 1: + /// - s3(source, access_key_id, secret_access_key, format, structure) + /// - s3(source, access_key_id, secret_access_key, session_token, format) + /// - s3(source, NOSIGN, format, structure, compression_method) + else if (count == 5) + { + if (with_structure) + { + auto second_arg = checkAndGetLiteralArgument(args[1], "NOSIGN/access_key_id"); + if (boost::iequals(second_arg, "NOSIGN")) + { + no_sign_request = true; + engine_args_to_idx = {{"format", 2}, {"structure", 3}, {"compression_method", 4}}; + } + else + { + auto fourth_arg = checkAndGetLiteralArgument(args[3], "format/session_token"); + if (fourth_arg == "auto" || FormatFactory::instance().exists(fourth_arg)) + { + engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}, {"structure", 4}}; + } + else + { + engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}, {"format", 4}}; + } + } + } + else + { + auto fourth_arg = checkAndGetLiteralArgument(args[3], "session_token/format"); + if (fourth_arg == "auto" || FormatFactory::instance().exists(fourth_arg)) + { + engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}, {"compression_method", 4}}; + } + else + { + engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}, {"format", 4}}; + } + } + } + else if (count == 6) + { + if (with_structure) + { + /// - s3(source, access_key_id, secret_access_key, format, structure, compression_method) + /// - s3(source, access_key_id, secret_access_key, session_token, format, structure) + /// We can distinguish them by looking at the 4-th argument: check if it's a format name or not + auto fourth_arg = checkAndGetLiteralArgument(args[3], "format/session_token"); + if (fourth_arg == "auto" || FormatFactory::instance().exists(fourth_arg)) + { + engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}, {"structure", 4}, {"compression_method", 5}}; + } + else + { + engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}, {"format", 4}, {"structure", 5}}; + } + } + else + { + engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}, {"format", 4}, {"compression_method", 5}}; + } + } + else if (with_structure && count == 7) + { + engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}, {"format", 4}, {"structure", 5}, {"compression_method", 6}}; + } + + /// This argument is always the first + url = S3::URI(checkAndGetLiteralArgument(args[0], "url")); + + if (engine_args_to_idx.contains("format")) + { + format = checkAndGetLiteralArgument(args[engine_args_to_idx["format"]], "format"); + /// Set format to configuration only of it's not 'auto', + /// because we can have default format set in configuration. + if (format != "auto") + format = format; + } + + if (engine_args_to_idx.contains("structure")) + structure = checkAndGetLiteralArgument(args[engine_args_to_idx["structure"]], "structure"); + + if (engine_args_to_idx.contains("compression_method")) + compression_method = checkAndGetLiteralArgument(args[engine_args_to_idx["compression_method"]], "compression_method"); + + if (engine_args_to_idx.contains("access_key_id")) + auth_settings.access_key_id = checkAndGetLiteralArgument(args[engine_args_to_idx["access_key_id"]], "access_key_id"); + + if (engine_args_to_idx.contains("secret_access_key")) + auth_settings.secret_access_key = checkAndGetLiteralArgument(args[engine_args_to_idx["secret_access_key"]], "secret_access_key"); + + if (engine_args_to_idx.contains("session_token")) + auth_settings.session_token = checkAndGetLiteralArgument(args[engine_args_to_idx["session_token"]], "session_token"); + + if (no_sign_request) + auth_settings.no_sign_request = no_sign_request; + + static_configuration = !auth_settings.access_key_id.empty() || auth_settings.no_sign_request.has_value(); + auth_settings.no_sign_request = no_sign_request; + + keys = {url.key}; +} + +void StorageS3Configuration::addStructureAndFormatToArgs( + ASTs & args, const String & structure_, const String & format_, ContextPtr context) +{ + if (tryGetNamedCollectionWithOverrides(args, context)) + { + /// In case of named collection, just add key-value pair "structure='...'" + /// at the end of arguments to override existed structure. + ASTs equal_func_args = {std::make_shared("structure"), std::make_shared(structure_)}; + auto equal_func = makeASTFunction("equals", std::move(equal_func_args)); + args.push_back(equal_func); + } + else + { + HTTPHeaderEntries tmp_headers; + size_t count = StorageURL::evalArgsAndCollectHeaders(args, tmp_headers, context); + + if (count == 0 || count > 6) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected 1 to 6 arguments in table function, got {}", count); + + auto format_literal = std::make_shared(format_); + auto structure_literal = std::make_shared(structure_); + + /// s3(s3_url) + if (count == 1) + { + /// Add format=auto before structure argument. + args.push_back(std::make_shared("auto")); + args.push_back(structure_literal); + } + /// s3(s3_url, format) or s3(s3_url, NOSIGN) + /// We can distinguish them by looking at the 2-nd argument: check if it's NOSIGN or not. + else if (count == 2) + { + auto second_arg = checkAndGetLiteralArgument(args[1], "format/NOSIGN"); + /// If there is NOSIGN, add format=auto before structure. + if (boost::iequals(second_arg, "NOSIGN")) + args.push_back(std::make_shared("auto")); + args.push_back(structure_literal); + } + /// s3(source, format, structure) or + /// s3(source, access_key_id, secret_access_key) or + /// s3(source, NOSIGN, format) + /// We can distinguish them by looking at the 2-nd argument: check if it's NOSIGN, format name or neither. + else if (count == 3) + { + auto second_arg = checkAndGetLiteralArgument(args[1], "format/NOSIGN"); + if (boost::iequals(second_arg, "NOSIGN")) + { + args.push_back(structure_literal); + } + else if (second_arg == "auto" || FormatFactory::instance().exists(second_arg)) + { + args[count - 1] = structure_literal; + } + else + { + /// Add format=auto before structure argument. + args.push_back(std::make_shared("auto")); + args.push_back(structure_literal); + } + } + /// s3(source, format, structure, compression_method) or + /// s3(source, access_key_id, secret_access_key, format) or + /// s3(source, NOSIGN, format, structure) + /// We can distinguish them by looking at the 2-nd argument: check if it's NOSIGN, format name or neither. + else if (count == 4) + { + auto second_arg = checkAndGetLiteralArgument(args[1], "format/NOSIGN"); + if (boost::iequals(second_arg, "NOSIGN")) + { + args[count - 1] = structure_literal; + } + else if (second_arg == "auto" || FormatFactory::instance().exists(second_arg)) + { + args[count - 2] = structure_literal; + } + else + { + args.push_back(structure_literal); + } + } + /// s3(source, access_key_id, secret_access_key, format, structure) or + /// s3(source, NOSIGN, format, structure, compression_method) + /// We can distinguish them by looking at the 2-nd argument: check if it's a NOSIGN keyword name or not. + else if (count == 5) + { + auto sedond_arg = checkAndGetLiteralArgument(args[1], "format/NOSIGN"); + if (boost::iequals(sedond_arg, "NOSIGN")) + { + args[count - 2] = structure_literal; + } + else + { + args[count - 1] = structure_literal; + } + } + /// s3(source, access_key_id, secret_access_key, format, structure, compression) + else if (count == 6) + { + args[count - 2] = structure_literal; + } + } +} + +} + +#endif diff --git a/src/Storages/ObjectStorage/S3/Configuration.h b/src/Storages/ObjectStorage/S3/Configuration.h new file mode 100644 index 00000000000..906d10a1a9a --- /dev/null +++ b/src/Storages/ObjectStorage/S3/Configuration.h @@ -0,0 +1,70 @@ +#pragma once + +#include "config.h" + +#if USE_AWS_S3 +#include +#include + +namespace DB +{ + +class StorageS3Configuration : public StorageObjectStorage::Configuration +{ +public: + using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr; + + static constexpr auto type_name = "s3"; + static constexpr auto namespace_name = "bucket"; + + StorageS3Configuration() = default; + StorageS3Configuration(const StorageS3Configuration & other); + + std::string getTypeName() const override { return type_name; } + std::string getEngineName() const override { return url.storage_name; } + std::string getNamespaceType() const override { return namespace_name; } + + Path getPath() const override { return url.key; } + void setPath(const Path & path) override { url.key = path; } + + const Paths & getPaths() const override { return keys; } + void setPaths(const Paths & paths) override { keys = paths; } + + String getNamespace() const override { return url.bucket; } + String getDataSourceDescription() const override; + StorageObjectStorage::QuerySettings getQuerySettings(const ContextPtr &) const override; + + bool isArchive() const override { return url.archive_pattern.has_value(); } + std::string getPathInArchive() const override; + + void check(ContextPtr context) const override; + void validateNamespace(const String & name) const override; + ConfigurationPtr clone() override { return std::make_shared(*this); } + bool isStaticConfiguration() const override { return static_configuration; } + + ObjectStoragePtr createObjectStorage(ContextPtr context, bool is_readonly) override; + + void addStructureAndFormatToArgs( + ASTs & args, + const String & structure, + const String & format, + ContextPtr context) override; + +private: + void fromNamedCollection(const NamedCollection & collection) override; + void fromAST(ASTs & args, ContextPtr context, bool with_structure) override; + + S3::URI url; + std::vector keys; + + S3::AuthSettings auth_settings; + S3Settings::RequestSettings request_settings; + HTTPHeaderEntries headers_from_ast; /// Headers from ast is a part of static configuration. + /// If s3 configuration was passed from ast, then it is static. + /// If from config - it can be changed with config reload. + bool static_configuration = true; +}; + +} + +#endif diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp new file mode 100644 index 00000000000..2c8e60b49d0 --- /dev/null +++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp @@ -0,0 +1,503 @@ +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int DATABASE_ACCESS_DENIED; + extern const int NOT_IMPLEMENTED; + extern const int LOGICAL_ERROR; +} + +StorageObjectStorage::StorageObjectStorage( + ConfigurationPtr configuration_, + ObjectStoragePtr object_storage_, + ContextPtr context, + const StorageID & table_id_, + const ColumnsDescription & columns_, + const ConstraintsDescription & constraints_, + const String & comment, + std::optional format_settings_, + bool distributed_processing_, + ASTPtr partition_by_) + : IStorage(table_id_) + , configuration(configuration_) + , object_storage(object_storage_) + , format_settings(format_settings_) + , partition_by(partition_by_) + , distributed_processing(distributed_processing_) + , log(getLogger(fmt::format("Storage{}({})", configuration->getEngineName(), table_id_.getFullTableName()))) +{ + ColumnsDescription columns{columns_}; + resolveSchemaAndFormat(columns, configuration->format, object_storage, configuration, format_settings, context); + configuration->check(context); + + StorageInMemoryMetadata metadata; + metadata.setColumns(columns); + metadata.setConstraints(constraints_); + metadata.setComment(comment); + + setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(metadata.getColumns())); + setInMemoryMetadata(metadata); +} + +String StorageObjectStorage::getName() const +{ + return configuration->getEngineName(); +} + +bool StorageObjectStorage::prefersLargeBlocks() const +{ + return FormatFactory::instance().checkIfOutputFormatPrefersLargeBlocks(configuration->format); +} + +bool StorageObjectStorage::parallelizeOutputAfterReading(ContextPtr context) const +{ + return FormatFactory::instance().checkParallelizeOutputAfterReading(configuration->format, context); +} + +bool StorageObjectStorage::supportsSubsetOfColumns(const ContextPtr & context) const +{ + return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration->format, context, format_settings); +} + +void StorageObjectStorage::updateConfiguration(ContextPtr context) +{ + IObjectStorage::ApplyNewSettingsOptions options{ .allow_client_change = !configuration->isStaticConfiguration() }; + object_storage->applyNewSettings(context->getConfigRef(), configuration->getTypeName() + ".", context, options); +} + +namespace +{ +class ReadFromObjectStorageStep : public SourceStepWithFilter +{ +public: + using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr; + + ReadFromObjectStorageStep( + ObjectStoragePtr object_storage_, + ConfigurationPtr configuration_, + const String & name_, + const Names & columns_to_read, + const NamesAndTypesList & virtual_columns_, + const SelectQueryInfo & query_info_, + const StorageSnapshotPtr & storage_snapshot_, + const std::optional & format_settings_, + bool distributed_processing_, + ReadFromFormatInfo info_, + const bool need_only_count_, + ContextPtr context_, + size_t max_block_size_, + size_t num_streams_) + : SourceStepWithFilter(DataStream{.header = info_.source_header}, columns_to_read, query_info_, storage_snapshot_, context_) + , object_storage(object_storage_) + , configuration(configuration_) + , info(std::move(info_)) + , virtual_columns(virtual_columns_) + , format_settings(format_settings_) + , name(name_ + "Source") + , need_only_count(need_only_count_) + , max_block_size(max_block_size_) + , num_streams(num_streams_) + , distributed_processing(distributed_processing_) + { + } + + std::string getName() const override { return name; } + + void applyFilters(ActionDAGNodes added_filter_nodes) override + { + SourceStepWithFilter::applyFilters(std::move(added_filter_nodes)); + const ActionsDAG::Node * predicate = nullptr; + if (filter_actions_dag) + predicate = filter_actions_dag->getOutputs().at(0); + createIterator(predicate); + } + + void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override + { + createIterator(nullptr); + + Pipes pipes; + auto context = getContext(); + const size_t max_threads = context->getSettingsRef().max_threads; + size_t estimated_keys_count = iterator_wrapper->estimatedKeysCount(); + + if (estimated_keys_count > 1) + num_streams = std::min(num_streams, estimated_keys_count); + else + { + /// The amount of keys (zero) was probably underestimated. + /// We will keep one stream for this particular case. + num_streams = 1; + } + + const size_t max_parsing_threads = num_streams >= max_threads ? 1 : (max_threads / std::max(num_streams, 1ul)); + + for (size_t i = 0; i < num_streams; ++i) + { + auto source = std::make_shared( + getName(), object_storage, configuration, info, format_settings, + context, max_block_size, iterator_wrapper, max_parsing_threads, need_only_count); + + source->setKeyCondition(filter_actions_dag, context); + pipes.emplace_back(std::move(source)); + } + + auto pipe = Pipe::unitePipes(std::move(pipes)); + if (pipe.empty()) + pipe = Pipe(std::make_shared(info.source_header)); + + for (const auto & processor : pipe.getProcessors()) + processors.emplace_back(processor); + + pipeline.init(std::move(pipe)); + } + +private: + ObjectStoragePtr object_storage; + ConfigurationPtr configuration; + std::shared_ptr iterator_wrapper; + + const ReadFromFormatInfo info; + const NamesAndTypesList virtual_columns; + const std::optional format_settings; + const String name; + const bool need_only_count; + const size_t max_block_size; + size_t num_streams; + const bool distributed_processing; + + void createIterator(const ActionsDAG::Node * predicate) + { + if (iterator_wrapper) + return; + auto context = getContext(); + iterator_wrapper = StorageObjectStorageSource::createFileIterator( + configuration, object_storage, distributed_processing, + context, predicate, virtual_columns, nullptr, context->getFileProgressCallback()); + } +}; +} + +void StorageObjectStorage::read( + QueryPlan & query_plan, + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & query_info, + ContextPtr local_context, + QueryProcessingStage::Enum /*processed_stage*/, + size_t max_block_size, + size_t num_streams) +{ + updateConfiguration(local_context); + if (partition_by && configuration->withPartitionWildcard()) + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "Reading from a partitioned {} storage is not implemented yet", + getName()); + } + + const auto read_from_format_info = prepareReadingFromFormat( + column_names, storage_snapshot, supportsSubsetOfColumns(local_context)); + const bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty()) + && local_context->getSettingsRef().optimize_count_from_files; + + auto read_step = std::make_unique( + object_storage, + configuration, + getName(), + column_names, + getVirtualsList(), + query_info, + storage_snapshot, + format_settings, + distributed_processing, + read_from_format_info, + need_only_count, + local_context, + max_block_size, + num_streams); + + query_plan.addStep(std::move(read_step)); +} + +SinkToStoragePtr StorageObjectStorage::write( + const ASTPtr & query, + const StorageMetadataPtr & metadata_snapshot, + ContextPtr local_context, + bool /* async_insert */) +{ + updateConfiguration(local_context); + const auto sample_block = metadata_snapshot->getSampleBlock(); + const auto & settings = configuration->getQuerySettings(local_context); + + if (configuration->isArchive()) + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "Path '{}' contains archive. Write into archive is not supported", + configuration->getPath()); + } + + if (configuration->withGlobsIgnorePartitionWildcard()) + { + throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED, + "Path '{}' contains globs, so the table is in readonly mode", + configuration->getPath()); + } + + if (configuration->withPartitionWildcard()) + { + ASTPtr partition_by_ast = nullptr; + if (auto insert_query = std::dynamic_pointer_cast(query)) + { + if (insert_query->partition_by) + partition_by_ast = insert_query->partition_by; + else + partition_by_ast = partition_by; + } + + if (partition_by_ast) + { + return std::make_shared( + object_storage, configuration, format_settings, sample_block, local_context, partition_by_ast); + } + } + + auto paths = configuration->getPaths(); + if (auto new_key = checkAndGetNewFileOnInsertIfNeeded( + *object_storage, *configuration, settings, paths.front(), paths.size())) + { + paths.push_back(*new_key); + } + configuration->setPaths(paths); + + return std::make_shared( + object_storage, + configuration->clone(), + format_settings, + sample_block, + local_context); +} + +void StorageObjectStorage::truncate( + const ASTPtr & /* query */, + const StorageMetadataPtr & /* metadata_snapshot */, + ContextPtr /* context */, + TableExclusiveLockHolder & /* table_holder */) +{ + if (configuration->isArchive()) + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "Path '{}' contains archive. Table cannot be truncated", + configuration->getPath()); + } + + if (configuration->withGlobs()) + { + throw Exception( + ErrorCodes::DATABASE_ACCESS_DENIED, + "{} key '{}' contains globs, so the table is in readonly mode and cannot be truncated", + getName(), configuration->getPath()); + } + + StoredObjects objects; + for (const auto & key : configuration->getPaths()) + objects.emplace_back(key); + + object_storage->removeObjectsIfExist(objects); +} + +std::unique_ptr StorageObjectStorage::createReadBufferIterator( + const ObjectStoragePtr & object_storage, + const ConfigurationPtr & configuration, + const std::optional & format_settings, + ObjectInfos & read_keys, + const ContextPtr & context) +{ + auto file_iterator = StorageObjectStorageSource::createFileIterator( + configuration, + object_storage, + false/* distributed_processing */, + context, + {}/* predicate */, + {}/* virtual_columns */, + &read_keys); + + return std::make_unique( + object_storage, configuration, file_iterator, + format_settings, getSchemaCache(context, configuration->getTypeName()), read_keys, context); +} + +ColumnsDescription StorageObjectStorage::resolveSchemaFromData( + const ObjectStoragePtr & object_storage, + const ConfigurationPtr & configuration, + const std::optional & format_settings, + const ContextPtr & context) +{ + ObjectInfos read_keys; + auto iterator = createReadBufferIterator(object_storage, configuration, format_settings, read_keys, context); + return readSchemaFromFormat(configuration->format, format_settings, *iterator, context); +} + +std::string StorageObjectStorage::resolveFormatFromData( + const ObjectStoragePtr & object_storage, + const ConfigurationPtr & configuration, + const std::optional & format_settings, + const ContextPtr & context) +{ + ObjectInfos read_keys; + auto iterator = createReadBufferIterator(object_storage, configuration, format_settings, read_keys, context); + return detectFormatAndReadSchema(format_settings, *iterator, context).second; +} + +std::pair StorageObjectStorage::resolveSchemaAndFormatFromData( + const ObjectStoragePtr & object_storage, + const ConfigurationPtr & configuration, + const std::optional & format_settings, + const ContextPtr & context) +{ + ObjectInfos read_keys; + auto iterator = createReadBufferIterator(object_storage, configuration, format_settings, read_keys, context); + auto [columns, format] = detectFormatAndReadSchema(format_settings, *iterator, context); + configuration->format = format; + return std::pair(columns, format); +} + +SchemaCache & StorageObjectStorage::getSchemaCache(const ContextPtr & context, const std::string & storage_type_name) +{ + if (storage_type_name == "s3") + { + static SchemaCache schema_cache( + context->getConfigRef().getUInt( + "schema_inference_cache_max_elements_for_s3", + DEFAULT_SCHEMA_CACHE_ELEMENTS)); + return schema_cache; + } + else if (storage_type_name == "hdfs") + { + static SchemaCache schema_cache( + context->getConfigRef().getUInt( + "schema_inference_cache_max_elements_for_hdfs", + DEFAULT_SCHEMA_CACHE_ELEMENTS)); + return schema_cache; + } + else if (storage_type_name == "azure") + { + static SchemaCache schema_cache( + context->getConfigRef().getUInt( + "schema_inference_cache_max_elements_for_azure", + DEFAULT_SCHEMA_CACHE_ELEMENTS)); + return schema_cache; + } + else + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported storage type: {}", storage_type_name); +} + +void StorageObjectStorage::Configuration::initialize( + Configuration & configuration, + ASTs & engine_args, + ContextPtr local_context, + bool with_table_structure) +{ + if (auto named_collection = tryGetNamedCollectionWithOverrides(engine_args, local_context)) + configuration.fromNamedCollection(*named_collection); + else + configuration.fromAST(engine_args, local_context, with_table_structure); + + if (configuration.format == "auto") + { + configuration.format = FormatFactory::instance().tryGetFormatFromFileName( + configuration.isArchive() + ? configuration.getPathInArchive() + : configuration.getPath()).value_or("auto"); + } + else + FormatFactory::instance().checkFormatName(configuration.format); + + configuration.initialized = true; +} + +void StorageObjectStorage::Configuration::check(ContextPtr) const +{ + FormatFactory::instance().checkFormatName(format); +} + +StorageObjectStorage::Configuration::Configuration(const Configuration & other) +{ + format = other.format; + compression_method = other.compression_method; + structure = other.structure; +} + +bool StorageObjectStorage::Configuration::withPartitionWildcard() const +{ + static const String PARTITION_ID_WILDCARD = "{_partition_id}"; + return getPath().find(PARTITION_ID_WILDCARD) != String::npos + || getNamespace().find(PARTITION_ID_WILDCARD) != String::npos; +} + +bool StorageObjectStorage::Configuration::withGlobsIgnorePartitionWildcard() const +{ + if (!withPartitionWildcard()) + return withGlobs(); + else + return PartitionedSink::replaceWildcards(getPath(), "").find_first_of("*?{") != std::string::npos; +} + +bool StorageObjectStorage::Configuration::isPathWithGlobs() const +{ + return getPath().find_first_of("*?{") != std::string::npos; +} + +bool StorageObjectStorage::Configuration::isNamespaceWithGlobs() const +{ + return getNamespace().find_first_of("*?{") != std::string::npos; +} + +std::string StorageObjectStorage::Configuration::getPathWithoutGlobs() const +{ + return getPath().substr(0, getPath().find_first_of("*?{")); +} + +bool StorageObjectStorage::Configuration::isPathInArchiveWithGlobs() const +{ + return getPathInArchive().find_first_of("*?{") != std::string::npos; +} + +std::string StorageObjectStorage::Configuration::getPathInArchive() const +{ + throw Exception(ErrorCodes::LOGICAL_ERROR, "Path {} is not archive", getPath()); +} + +void StorageObjectStorage::Configuration::assertInitialized() const +{ + if (!initialized) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "Configuration was not initialized before usage"); + } +} +} diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.h b/src/Storages/ObjectStorage/StorageObjectStorage.h new file mode 100644 index 00000000000..f45d8c1f01a --- /dev/null +++ b/src/Storages/ObjectStorage/StorageObjectStorage.h @@ -0,0 +1,204 @@ +#pragma once +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +class ReadBufferIterator; +class SchemaCache; +class NamedCollection; + +/** + * A general class containing implementation for external table engines + * such as StorageS3, StorageAzure, StorageHDFS. + * Works with an object of IObjectStorage class. + */ +class StorageObjectStorage : public IStorage +{ +public: + class Configuration; + using ConfigurationPtr = std::shared_ptr; + using ObjectInfo = RelativePathWithMetadata; + using ObjectInfoPtr = std::shared_ptr; + using ObjectInfos = std::vector; + + struct QuerySettings + { + /// Insert settings: + bool truncate_on_insert; + bool create_new_file_on_insert; + + /// Schema inference settings: + bool schema_inference_use_cache; + SchemaInferenceMode schema_inference_mode; + + /// List settings: + bool skip_empty_files; + size_t list_object_keys_size; + bool throw_on_zero_files_match; + bool ignore_non_existent_file; + }; + + StorageObjectStorage( + ConfigurationPtr configuration_, + ObjectStoragePtr object_storage_, + ContextPtr context_, + const StorageID & table_id_, + const ColumnsDescription & columns_, + const ConstraintsDescription & constraints_, + const String & comment, + std::optional format_settings_, + bool distributed_processing_ = false, + ASTPtr partition_by_ = nullptr); + + String getName() const override; + + void read( + QueryPlan & query_plan, + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & query_info, + ContextPtr local_context, + QueryProcessingStage::Enum processed_stage, + size_t max_block_size, + size_t num_streams) override; + + SinkToStoragePtr write( + const ASTPtr & query, + const StorageMetadataPtr & metadata_snapshot, + ContextPtr context, + bool async_insert) override; + + void truncate( + const ASTPtr & query, + const StorageMetadataPtr & metadata_snapshot, + ContextPtr local_context, + TableExclusiveLockHolder &) override; + + bool supportsPartitionBy() const override { return true; } + + bool supportsSubcolumns() const override { return true; } + + bool supportsDynamicSubcolumns() const override { return true; } + + bool supportsTrivialCountOptimization(const StorageSnapshotPtr &, ContextPtr) const override { return true; } + + bool supportsSubsetOfColumns(const ContextPtr & context) const; + + bool prefersLargeBlocks() const override; + + bool parallelizeOutputAfterReading(ContextPtr context) const override; + + static SchemaCache & getSchemaCache(const ContextPtr & context, const std::string & storage_type_name); + + static ColumnsDescription resolveSchemaFromData( + const ObjectStoragePtr & object_storage, + const ConfigurationPtr & configuration, + const std::optional & format_settings, + const ContextPtr & context); + + static std::string resolveFormatFromData( + const ObjectStoragePtr & object_storage, + const ConfigurationPtr & configuration, + const std::optional & format_settings, + const ContextPtr & context); + + static std::pair resolveSchemaAndFormatFromData( + const ObjectStoragePtr & object_storage, + const ConfigurationPtr & configuration, + const std::optional & format_settings, + const ContextPtr & context); + +protected: + virtual void updateConfiguration(ContextPtr local_context); + + static std::unique_ptr createReadBufferIterator( + const ObjectStoragePtr & object_storage, + const ConfigurationPtr & configuration, + const std::optional & format_settings, + ObjectInfos & read_keys, + const ContextPtr & context); + + ConfigurationPtr configuration; + const ObjectStoragePtr object_storage; + const std::optional format_settings; + const ASTPtr partition_by; + const bool distributed_processing; + + LoggerPtr log; +}; + +class StorageObjectStorage::Configuration +{ +public: + Configuration() = default; + Configuration(const Configuration & other); + virtual ~Configuration() = default; + + using Path = std::string; + using Paths = std::vector; + + static void initialize( + Configuration & configuration, + ASTs & engine_args, + ContextPtr local_context, + bool with_table_structure); + + /// Storage type: s3, hdfs, azure. + virtual std::string getTypeName() const = 0; + /// Engine name: S3, HDFS, Azure. + virtual std::string getEngineName() const = 0; + /// Sometimes object storages have something similar to chroot or namespace, for example + /// buckets in S3. If object storage doesn't have any namepaces return empty string. + virtual std::string getNamespaceType() const { return "namespace"; } + + virtual Path getPath() const = 0; + virtual void setPath(const Path & path) = 0; + + virtual const Paths & getPaths() const = 0; + virtual void setPaths(const Paths & paths) = 0; + + virtual String getDataSourceDescription() const = 0; + virtual String getNamespace() const = 0; + + virtual StorageObjectStorage::QuerySettings getQuerySettings(const ContextPtr &) const = 0; + virtual void addStructureAndFormatToArgs( + ASTs & args, const String & structure_, const String & format_, ContextPtr context) = 0; + + bool withPartitionWildcard() const; + bool withGlobs() const { return isPathWithGlobs() || isNamespaceWithGlobs(); } + bool withGlobsIgnorePartitionWildcard() const; + bool isPathWithGlobs() const; + bool isNamespaceWithGlobs() const; + virtual std::string getPathWithoutGlobs() const; + + virtual bool isArchive() const { return false; } + bool isPathInArchiveWithGlobs() const; + virtual std::string getPathInArchive() const; + + virtual void check(ContextPtr context) const; + virtual void validateNamespace(const String & /* name */) const {} + + virtual ObjectStoragePtr createObjectStorage(ContextPtr context, bool is_readonly) = 0; + virtual ConfigurationPtr clone() = 0; + virtual bool isStaticConfiguration() const { return true; } + + String format = "auto"; + String compression_method = "auto"; + String structure = "auto"; + +protected: + virtual void fromNamedCollection(const NamedCollection & collection) = 0; + virtual void fromAST(ASTs & args, ContextPtr context, bool with_structure) = 0; + + void assertInitialized() const; + + bool initialized = false; +}; + +} diff --git a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp new file mode 100644 index 00000000000..78f568d8ae2 --- /dev/null +++ b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp @@ -0,0 +1,100 @@ +#include "Storages/ObjectStorage/StorageObjectStorageCluster.h" + +#include +#include +#include +#include + +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +StorageObjectStorageCluster::StorageObjectStorageCluster( + const String & cluster_name_, + ConfigurationPtr configuration_, + ObjectStoragePtr object_storage_, + const StorageID & table_id_, + const ColumnsDescription & columns_, + const ConstraintsDescription & constraints_, + ContextPtr context_) + : IStorageCluster( + cluster_name_, table_id_, getLogger(fmt::format("{}({})", configuration_->getEngineName(), table_id_.table_name))) + , configuration{configuration_} + , object_storage(object_storage_) +{ + ColumnsDescription columns{columns_}; + resolveSchemaAndFormat(columns, configuration->format, object_storage, configuration, {}, context_); + configuration->check(context_); + + StorageInMemoryMetadata metadata; + metadata.setColumns(columns); + metadata.setConstraints(constraints_); + + setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(metadata.getColumns())); + setInMemoryMetadata(metadata); +} + +std::string StorageObjectStorageCluster::getName() const +{ + return configuration->getEngineName(); +} + +void StorageObjectStorageCluster::updateQueryToSendIfNeeded( + ASTPtr & query, + const DB::StorageSnapshotPtr & storage_snapshot, + const ContextPtr & context) +{ + ASTExpressionList * expression_list = extractTableFunctionArgumentsFromSelectQuery(query); + if (!expression_list) + { + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Expected SELECT query from table function {}, got '{}'", + configuration->getEngineName(), queryToString(query)); + } + + ASTs & args = expression_list->children; + const auto & structure = storage_snapshot->metadata->getColumns().getAll().toNamesAndTypesDescription(); + if (args.empty()) + { + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Unexpected empty list of arguments for {}Cluster table function", + configuration->getEngineName()); + } + + ASTPtr cluster_name_arg = args.front(); + args.erase(args.begin()); + configuration->addStructureAndFormatToArgs(args, structure, configuration->format, context); + args.insert(args.begin(), cluster_name_arg); +} + +RemoteQueryExecutor::Extension StorageObjectStorageCluster::getTaskIteratorExtension( + const ActionsDAG::Node * predicate, const ContextPtr & local_context) const +{ + auto iterator = StorageObjectStorageSource::createFileIterator( + configuration, object_storage, /* distributed_processing */false, local_context, + predicate, virtual_columns, nullptr, local_context->getFileProgressCallback()); + + auto callback = std::make_shared>([iterator]() mutable -> String + { + auto object_info = iterator->next(0); + if (object_info) + return object_info->getPath(); + else + return ""; + }); + return RemoteQueryExecutor::Extension{ .task_iterator = std::move(callback) }; +} + +} diff --git a/src/Storages/ObjectStorage/StorageObjectStorageCluster.h b/src/Storages/ObjectStorage/StorageObjectStorageCluster.h new file mode 100644 index 00000000000..69fec2b3c77 --- /dev/null +++ b/src/Storages/ObjectStorage/StorageObjectStorageCluster.h @@ -0,0 +1,48 @@ +#pragma once +#include +#include +#include + +namespace DB +{ + +class Context; + +class StorageObjectStorageCluster : public IStorageCluster +{ +public: + using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr; + + StorageObjectStorageCluster( + const String & cluster_name_, + ConfigurationPtr configuration_, + ObjectStoragePtr object_storage_, + const StorageID & table_id_, + const ColumnsDescription & columns_, + const ConstraintsDescription & constraints_, + ContextPtr context_); + + std::string getName() const override; + + bool supportsSubcolumns() const override { return true; } + + bool supportsDynamicSubcolumns() const override { return true; } + + bool supportsTrivialCountOptimization(const StorageSnapshotPtr &, ContextPtr) const override { return true; } + + RemoteQueryExecutor::Extension getTaskIteratorExtension( + const ActionsDAG::Node * predicate, const ContextPtr & context) const override; + +private: + void updateQueryToSendIfNeeded( + ASTPtr & query, + const StorageSnapshotPtr & storage_snapshot, + const ContextPtr & context) override; + + const String engine_name; + const StorageObjectStorage::ConfigurationPtr configuration; + const ObjectStoragePtr object_storage; + NamesAndTypesList virtual_columns; +}; + +} diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp new file mode 100644 index 00000000000..0a3cf19a590 --- /dev/null +++ b/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp @@ -0,0 +1,168 @@ +#include "StorageObjectStorageSink.h" +#include +#include +#include +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int CANNOT_PARSE_TEXT; + extern const int BAD_ARGUMENTS; +} + +StorageObjectStorageSink::StorageObjectStorageSink( + ObjectStoragePtr object_storage, + ConfigurationPtr configuration, + const std::optional & format_settings_, + const Block & sample_block_, + ContextPtr context, + const std::string & blob_path) + : SinkToStorage(sample_block_) + , sample_block(sample_block_) +{ + const auto & settings = context->getSettingsRef(); + const auto path = blob_path.empty() ? configuration->getPaths().back() : blob_path; + const auto chosen_compression_method = chooseCompressionMethod(path, configuration->compression_method); + + auto buffer = object_storage->writeObject( + StoredObject(path), WriteMode::Rewrite, std::nullopt, DBMS_DEFAULT_BUFFER_SIZE, context->getWriteSettings()); + + write_buf = wrapWriteBufferWithCompressionMethod( + std::move(buffer), + chosen_compression_method, + static_cast(settings.output_format_compression_level), + static_cast(settings.output_format_compression_zstd_window_log)); + + writer = FormatFactory::instance().getOutputFormatParallelIfPossible( + configuration->format, *write_buf, sample_block, context, format_settings_); +} + +void StorageObjectStorageSink::consume(Chunk chunk) +{ + std::lock_guard lock(cancel_mutex); + if (cancelled) + return; + writer->write(getHeader().cloneWithColumns(chunk.detachColumns())); +} + +void StorageObjectStorageSink::onCancel() +{ + std::lock_guard lock(cancel_mutex); + finalize(); + cancelled = true; +} + +void StorageObjectStorageSink::onException(std::exception_ptr exception) +{ + std::lock_guard lock(cancel_mutex); + try + { + std::rethrow_exception(exception); + } + catch (...) + { + /// An exception context is needed to proper delete write buffers without finalization. + release(); + } +} + +void StorageObjectStorageSink::onFinish() +{ + std::lock_guard lock(cancel_mutex); + finalize(); +} + +void StorageObjectStorageSink::finalize() +{ + if (!writer) + return; + + try + { + writer->finalize(); + writer->flush(); + write_buf->finalize(); + } + catch (...) + { + /// Stop ParallelFormattingOutputFormat correctly. + release(); + throw; + } +} + +void StorageObjectStorageSink::release() +{ + writer.reset(); + write_buf.reset(); +} + +PartitionedStorageObjectStorageSink::PartitionedStorageObjectStorageSink( + ObjectStoragePtr object_storage_, + ConfigurationPtr configuration_, + std::optional format_settings_, + const Block & sample_block_, + ContextPtr context_, + const ASTPtr & partition_by) + : PartitionedSink(partition_by, context_, sample_block_) + , object_storage(object_storage_) + , configuration(configuration_) + , query_settings(configuration_->getQuerySettings(context_)) + , format_settings(format_settings_) + , sample_block(sample_block_) + , context(context_) +{ +} + +SinkPtr PartitionedStorageObjectStorageSink::createSinkForPartition(const String & partition_id) +{ + auto partition_bucket = replaceWildcards(configuration->getNamespace(), partition_id); + validateNamespace(partition_bucket); + + auto partition_key = replaceWildcards(configuration->getPath(), partition_id); + validateKey(partition_key); + + if (auto new_key = checkAndGetNewFileOnInsertIfNeeded( + *object_storage, *configuration, query_settings, partition_key, /* sequence_number */1)) + { + partition_key = *new_key; + } + + return std::make_shared( + object_storage, + configuration, + format_settings, + sample_block, + context, + partition_key + ); +} + +void PartitionedStorageObjectStorageSink::validateKey(const String & str) +{ + /// See: + /// - https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html + /// - https://cloud.ibm.com/apidocs/cos/cos-compatibility#putobject + + if (str.empty() || str.size() > 1024) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Incorrect key length (not empty, max 1023 characters), got: {}", str.size()); + + if (!UTF8::isValidUTF8(reinterpret_cast(str.data()), str.size())) + throw Exception(ErrorCodes::CANNOT_PARSE_TEXT, "Incorrect non-UTF8 sequence in key"); + + validatePartitionKey(str, true); +} + +void PartitionedStorageObjectStorageSink::validateNamespace(const String & str) +{ + configuration->validateNamespace(str); + + if (!UTF8::isValidUTF8(reinterpret_cast(str.data()), str.size())) + throw Exception(ErrorCodes::CANNOT_PARSE_TEXT, "Incorrect non-UTF8 sequence in bucket name"); + + validatePartitionKey(str, false); +} + +} diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSink.h b/src/Storages/ObjectStorage/StorageObjectStorageSink.h new file mode 100644 index 00000000000..45cf83d606f --- /dev/null +++ b/src/Storages/ObjectStorage/StorageObjectStorageSink.h @@ -0,0 +1,70 @@ +#pragma once +#include +#include +#include + +namespace DB +{ +class StorageObjectStorageSink : public SinkToStorage +{ +public: + using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr; + + StorageObjectStorageSink( + ObjectStoragePtr object_storage, + ConfigurationPtr configuration, + const std::optional & format_settings_, + const Block & sample_block_, + ContextPtr context, + const std::string & blob_path = ""); + + String getName() const override { return "StorageObjectStorageSink"; } + + void consume(Chunk chunk) override; + + void onCancel() override; + + void onException(std::exception_ptr exception) override; + + void onFinish() override; + +private: + const Block sample_block; + std::unique_ptr write_buf; + OutputFormatPtr writer; + bool cancelled = false; + std::mutex cancel_mutex; + + void finalize(); + void release(); +}; + +class PartitionedStorageObjectStorageSink : public PartitionedSink +{ +public: + using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr; + + PartitionedStorageObjectStorageSink( + ObjectStoragePtr object_storage_, + ConfigurationPtr configuration_, + std::optional format_settings_, + const Block & sample_block_, + ContextPtr context_, + const ASTPtr & partition_by); + + SinkPtr createSinkForPartition(const String & partition_id) override; + +private: + void validateKey(const String & str); + void validateNamespace(const String & str); + + ObjectStoragePtr object_storage; + ConfigurationPtr configuration; + + const StorageObjectStorage::QuerySettings query_settings; + const std::optional format_settings; + const Block sample_block; + const ContextPtr context; +}; + +} diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp new file mode 100644 index 00000000000..b31d0f8a92e --- /dev/null +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp @@ -0,0 +1,783 @@ +#include "StorageObjectStorageSource.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace fs = std::filesystem; + +namespace ProfileEvents +{ + extern const Event EngineFileLikeReadFiles; +} + +namespace CurrentMetrics +{ + extern const Metric StorageObjectStorageThreads; + extern const Metric StorageObjectStorageThreadsActive; + extern const Metric StorageObjectStorageThreadsScheduled; +} + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int CANNOT_COMPILE_REGEXP; + extern const int BAD_ARGUMENTS; + extern const int LOGICAL_ERROR; + extern const int FILE_DOESNT_EXIST; +} + +StorageObjectStorageSource::StorageObjectStorageSource( + String name_, + ObjectStoragePtr object_storage_, + ConfigurationPtr configuration_, + const ReadFromFormatInfo & info, + const std::optional & format_settings_, + ContextPtr context_, + UInt64 max_block_size_, + std::shared_ptr file_iterator_, + size_t max_parsing_threads_, + bool need_only_count_) + : SourceWithKeyCondition(info.source_header, false) + , WithContext(context_) + , name(std::move(name_)) + , object_storage(object_storage_) + , configuration(configuration_) + , format_settings(format_settings_) + , max_block_size(max_block_size_) + , need_only_count(need_only_count_) + , max_parsing_threads(max_parsing_threads_) + , read_from_format_info(info) + , create_reader_pool(std::make_shared( + CurrentMetrics::StorageObjectStorageThreads, + CurrentMetrics::StorageObjectStorageThreadsActive, + CurrentMetrics::StorageObjectStorageThreadsScheduled, + 1/* max_threads */)) + , columns_desc(info.columns_description) + , file_iterator(file_iterator_) + , schema_cache(StorageObjectStorage::getSchemaCache(context_, configuration->getTypeName())) + , create_reader_scheduler(threadPoolCallbackRunnerUnsafe(*create_reader_pool, "Reader")) +{ +} + +StorageObjectStorageSource::~StorageObjectStorageSource() +{ + create_reader_pool->wait(); +} + +void StorageObjectStorageSource::setKeyCondition(const ActionsDAGPtr & filter_actions_dag, ContextPtr context_) +{ + setKeyConditionImpl(filter_actions_dag, context_, read_from_format_info.format_header); +} + +std::string StorageObjectStorageSource::getUniqueStoragePathIdentifier( + const Configuration & configuration, + const ObjectInfo & object_info, + bool include_connection_info) +{ + auto path = object_info.getPath(); + if (path.starts_with("/")) + path = path.substr(1); + + if (include_connection_info) + return fs::path(configuration.getDataSourceDescription()) / path; + else + return fs::path(configuration.getNamespace()) / path; +} + +std::shared_ptr StorageObjectStorageSource::createFileIterator( + ConfigurationPtr configuration, + ObjectStoragePtr object_storage, + bool distributed_processing, + const ContextPtr & local_context, + const ActionsDAG::Node * predicate, + const NamesAndTypesList & virtual_columns, + ObjectInfos * read_keys, + std::function file_progress_callback) +{ + if (distributed_processing) + return std::make_shared( + local_context->getReadTaskCallback(), + local_context->getSettingsRef().max_threads); + + if (configuration->isNamespaceWithGlobs()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Expression can not have wildcards inside {} name", configuration->getNamespaceType()); + + auto settings = configuration->getQuerySettings(local_context); + const bool is_archive = configuration->isArchive(); + + std::unique_ptr iterator; + if (configuration->isPathWithGlobs()) + { + /// Iterate through disclosed globs and make a source for each file + iterator = std::make_unique( + object_storage, configuration, predicate, virtual_columns, + local_context, is_archive ? nullptr : read_keys, settings.list_object_keys_size, + settings.throw_on_zero_files_match, file_progress_callback); + } + else + { + ConfigurationPtr copy_configuration = configuration->clone(); + auto filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns); + if (filter_dag) + { + auto keys = configuration->getPaths(); + std::vector paths; + paths.reserve(keys.size()); + for (const auto & key : keys) + paths.push_back(fs::path(configuration->getNamespace()) / key); + VirtualColumnUtils::filterByPathOrFile(keys, paths, filter_dag, virtual_columns, local_context); + copy_configuration->setPaths(keys); + } + + iterator = std::make_unique( + object_storage, copy_configuration, virtual_columns, is_archive ? nullptr : read_keys, + settings.ignore_non_existent_file, file_progress_callback); + } + + if (is_archive) + { + return std::make_shared(object_storage, configuration, std::move(iterator), local_context, read_keys); + } + + return iterator; +} + +void StorageObjectStorageSource::lazyInitialize(size_t processor) +{ + if (initialized) + return; + + reader = createReader(processor); + if (reader) + reader_future = createReaderAsync(processor); + initialized = true; +} + +Chunk StorageObjectStorageSource::generate() +{ + lazyInitialize(0); + + while (true) + { + if (isCancelled() || !reader) + { + if (reader) + reader->cancel(); + break; + } + + Chunk chunk; + if (reader->pull(chunk)) + { + UInt64 num_rows = chunk.getNumRows(); + total_rows_in_file += num_rows; + + size_t chunk_size = 0; + if (const auto * input_format = reader.getInputFormat()) + chunk_size = input_format->getApproxBytesReadForChunk(); + + progress(num_rows, chunk_size ? chunk_size : chunk.bytes()); + + const auto & object_info = reader.getObjectInfo(); + const auto & filename = object_info.getFileName(); + chassert(object_info.metadata); + VirtualColumnUtils::addRequestedPathFileAndSizeVirtualsToChunk( + chunk, + read_from_format_info.requested_virtual_columns, + getUniqueStoragePathIdentifier(*configuration, reader.getObjectInfo(), false), + object_info.metadata->size_bytes, &filename); + + return chunk; + } + + if (reader.getInputFormat() && getContext()->getSettingsRef().use_cache_for_count_from_files) + addNumRowsToCache(reader.getObjectInfo(), total_rows_in_file); + + total_rows_in_file = 0; + + assert(reader_future.valid()); + reader = reader_future.get(); + + if (!reader) + break; + + /// Even if task is finished the thread may be not freed in pool. + /// So wait until it will be freed before scheduling a new task. + create_reader_pool->wait(); + reader_future = createReaderAsync(); + } + + return {}; +} + +void StorageObjectStorageSource::addNumRowsToCache(const ObjectInfo & object_info, size_t num_rows) +{ + const auto cache_key = getKeyForSchemaCache( + getUniqueStoragePathIdentifier(*configuration, object_info), + configuration->format, + format_settings, + getContext()); + schema_cache.addNumRows(cache_key, num_rows); +} + +std::optional StorageObjectStorageSource::tryGetNumRowsFromCache(const ObjectInfo & object_info) +{ + const auto cache_key = getKeyForSchemaCache( + getUniqueStoragePathIdentifier(*configuration, object_info), + configuration->format, + format_settings, + getContext()); + + auto get_last_mod_time = [&]() -> std::optional + { + return object_info.metadata + ? std::optional(object_info.metadata->last_modified.epochTime()) + : std::nullopt; + }; + return schema_cache.tryGetNumRows(cache_key, get_last_mod_time); +} + +StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReader(size_t processor) +{ + ObjectInfoPtr object_info; + auto query_settings = configuration->getQuerySettings(getContext()); + + do + { + object_info = file_iterator->next(processor); + + if (!object_info || object_info->getFileName().empty()) + return {}; + + if (!object_info->metadata) + { + const auto & path = object_info->isArchive() ? object_info->getPathToArchive() : object_info->getPath(); + object_info->metadata = object_storage->getObjectMetadata(path); + } + } + while (query_settings.skip_empty_files && object_info->metadata->size_bytes == 0); + + QueryPipelineBuilder builder; + std::shared_ptr source; + std::unique_ptr read_buf; + + std::optional num_rows_from_cache = need_only_count + && getContext()->getSettingsRef().use_cache_for_count_from_files + ? tryGetNumRowsFromCache(*object_info) + : std::nullopt; + + if (num_rows_from_cache) + { + /// We should not return single chunk with all number of rows, + /// because there is a chance that this chunk will be materialized later + /// (it can cause memory problems even with default values in columns or when virtual columns are requested). + /// Instead, we use special ConstChunkGenerator that will generate chunks + /// with max_block_size rows until total number of rows is reached. + builder.init(Pipe(std::make_shared( + read_from_format_info.format_header, *num_rows_from_cache, max_block_size))); + } + else + { + CompressionMethod compression_method; + if (const auto * object_info_in_archive = dynamic_cast(object_info.get())) + { + compression_method = chooseCompressionMethod(configuration->getPathInArchive(), configuration->compression_method); + const auto & archive_reader = object_info_in_archive->archive_reader; + read_buf = archive_reader->readFile(object_info_in_archive->path_in_archive, /*throw_on_not_found=*/true); + } + else + { + compression_method = chooseCompressionMethod(object_info->getFileName(), configuration->compression_method); + read_buf = createReadBuffer(*object_info); + } + + auto input_format = FormatFactory::instance().getInput( + configuration->format, + *read_buf, + read_from_format_info.format_header, + getContext(), + max_block_size, + format_settings, + need_only_count ? 1 : max_parsing_threads, + std::nullopt, + true/* is_remote_fs */, + compression_method, + need_only_count); + + if (key_condition) + input_format->setKeyCondition(key_condition); + + if (need_only_count) + input_format->needOnlyCount(); + + builder.init(Pipe(input_format)); + + if (columns_desc.hasDefaults()) + { + builder.addSimpleTransform( + [&](const Block & header) + { + return std::make_shared(header, columns_desc, *input_format, getContext()); + }); + } + + source = input_format; + } + + /// Add ExtractColumnsTransform to extract requested columns/subcolumns + /// from chunk read by IInputFormat. + builder.addSimpleTransform([&](const Block & header) + { + return std::make_shared(header, read_from_format_info.requested_columns); + }); + + auto pipeline = std::make_unique(QueryPipelineBuilder::getPipeline(std::move(builder))); + auto current_reader = std::make_unique(*pipeline); + + ProfileEvents::increment(ProfileEvents::EngineFileLikeReadFiles); + + return ReaderHolder( + object_info, std::move(read_buf), std::move(source), std::move(pipeline), std::move(current_reader)); +} + +std::future StorageObjectStorageSource::createReaderAsync(size_t processor) +{ + return create_reader_scheduler([=, this] { return createReader(processor); }, Priority{}); +} + +std::unique_ptr StorageObjectStorageSource::createReadBuffer(const ObjectInfo & object_info) +{ + const auto & object_size = object_info.metadata->size_bytes; + + auto read_settings = getContext()->getReadSettings().adjustBufferSize(object_size); + read_settings.enable_filesystem_cache = false; + /// FIXME: Changing this setting to default value breaks something around parquet reading + read_settings.remote_read_min_bytes_for_seek = read_settings.remote_fs_buffer_size; + + const bool object_too_small = object_size <= 2 * getContext()->getSettings().max_download_buffer_size; + const bool use_prefetch = object_too_small && read_settings.remote_fs_method == RemoteFSReadMethod::threadpool; + read_settings.remote_fs_method = use_prefetch ? RemoteFSReadMethod::threadpool : RemoteFSReadMethod::read; + /// User's object may change, don't cache it. + read_settings.use_page_cache_for_disks_without_file_cache = false; + + // Create a read buffer that will prefetch the first ~1 MB of the file. + // When reading lots of tiny files, this prefetching almost doubles the throughput. + // For bigger files, parallel reading is more useful. + if (use_prefetch) + { + LOG_TRACE(log, "Downloading object of size {} with initial prefetch", object_size); + + auto async_reader = object_storage->readObjects( + StoredObjects{StoredObject{object_info.getPath(), /* local_path */ "", object_size}}, read_settings); + + async_reader->setReadUntilEnd(); + if (read_settings.remote_fs_prefetch) + async_reader->prefetch(DEFAULT_PREFETCH_PRIORITY); + + return async_reader; + } + else + { + /// FIXME: this is inconsistent that readObject always reads synchronously ignoring read_method setting. + return object_storage->readObject(StoredObject(object_info.getPath(), "", object_size), read_settings); + } +} + +StorageObjectStorageSource::IIterator::IIterator(const std::string & logger_name_) + : logger(getLogger(logger_name_)) +{ +} + +StorageObjectStorage::ObjectInfoPtr StorageObjectStorageSource::IIterator::next(size_t processor) +{ + auto object_info = nextImpl(processor); + + if (object_info) + { + LOG_TEST(logger, "Next key: {}", object_info->getFileName()); + } + + return object_info; +} + +StorageObjectStorageSource::GlobIterator::GlobIterator( + ObjectStoragePtr object_storage_, + ConfigurationPtr configuration_, + const ActionsDAG::Node * predicate, + const NamesAndTypesList & virtual_columns_, + ContextPtr context_, + ObjectInfos * read_keys_, + size_t list_object_keys_size, + bool throw_on_zero_files_match_, + std::function file_progress_callback_) + : IIterator("GlobIterator") + , WithContext(context_) + , object_storage(object_storage_) + , configuration(configuration_) + , virtual_columns(virtual_columns_) + , throw_on_zero_files_match(throw_on_zero_files_match_) + , read_keys(read_keys_) + , file_progress_callback(file_progress_callback_) +{ + if (configuration->isNamespaceWithGlobs()) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expression can not have wildcards inside namespace name"); + } + else if (configuration->isPathWithGlobs()) + { + const auto key_with_globs = configuration_->getPath(); + const auto key_prefix = configuration->getPathWithoutGlobs(); + object_storage_iterator = object_storage->iterate(key_prefix, list_object_keys_size); + + matcher = std::make_unique(makeRegexpPatternFromGlobs(key_with_globs)); + if (!matcher->ok()) + { + throw Exception( + ErrorCodes::CANNOT_COMPILE_REGEXP, + "Cannot compile regex from glob ({}): {}", key_with_globs, matcher->error()); + } + + recursive = key_with_globs == "/**"; + filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns); + } + else + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Using glob iterator with path without globs is not allowed (used path: {})", + configuration->getPath()); + } +} + +size_t StorageObjectStorageSource::GlobIterator::estimatedKeysCount() +{ + if (object_infos.empty() && !is_finished && object_storage_iterator->isValid()) + { + /// 1000 files were listed, and we cannot make any estimation of _how many more_ there are (because we list bucket lazily); + /// If there are more objects in the bucket, limiting the number of streams is the last thing we may want to do + /// as it would lead to serious slow down of the execution, since objects are going + /// to be fetched sequentially rather than in-parallel with up to times. + return std::numeric_limits::max(); + } + return object_infos.size(); +} + +StorageObjectStorage::ObjectInfoPtr StorageObjectStorageSource::GlobIterator::nextImpl(size_t processor) +{ + std::lock_guard lock(next_mutex); + auto object_info = nextImplUnlocked(processor); + if (first_iteration && !object_info && throw_on_zero_files_match) + { + throw Exception(ErrorCodes::FILE_DOESNT_EXIST, + "Can not match any files with path {}", + configuration->getPath()); + } + first_iteration = false; + return object_info; +} + +StorageObjectStorage::ObjectInfoPtr StorageObjectStorageSource::GlobIterator::nextImplUnlocked(size_t /* processor */) +{ + bool current_batch_processed = object_infos.empty() || index >= object_infos.size(); + if (is_finished && current_batch_processed) + return {}; + + if (current_batch_processed) + { + ObjectInfos new_batch; + while (new_batch.empty()) + { + auto result = object_storage_iterator->getCurrentBatchAndScheduleNext(); + if (!result.has_value()) + { + is_finished = true; + return {}; + } + + new_batch = std::move(result.value()); + for (auto it = new_batch.begin(); it != new_batch.end();) + { + if (!recursive && !re2::RE2::FullMatch((*it)->getPath(), *matcher)) + it = new_batch.erase(it); + else + ++it; + } + } + + index = 0; + + if (filter_dag) + { + std::vector paths; + paths.reserve(new_batch.size()); + for (const auto & object_info : new_batch) + { + chassert(object_info); + paths.push_back(getUniqueStoragePathIdentifier(*configuration, *object_info, false)); + } + + VirtualColumnUtils::filterByPathOrFile(new_batch, paths, filter_dag, virtual_columns, getContext()); + LOG_TEST(logger, "Filtered files: {} -> {}", paths.size(), new_batch.size()); + } + + if (read_keys) + read_keys->insert(read_keys->end(), new_batch.begin(), new_batch.end()); + + object_infos = std::move(new_batch); + + if (file_progress_callback) + { + for (const auto & object_info : object_infos) + { + chassert(object_info->metadata); + file_progress_callback(FileProgress(0, object_info->metadata->size_bytes)); + } + } + } + + if (index >= object_infos.size()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Index out of bound for blob metadata"); + + return object_infos[index++]; +} + +StorageObjectStorageSource::KeysIterator::KeysIterator( + ObjectStoragePtr object_storage_, + ConfigurationPtr configuration_, + const NamesAndTypesList & virtual_columns_, + ObjectInfos * read_keys_, + bool ignore_non_existent_files_, + std::function file_progress_callback_) + : IIterator("KeysIterator") + , object_storage(object_storage_) + , configuration(configuration_) + , virtual_columns(virtual_columns_) + , file_progress_callback(file_progress_callback_) + , keys(configuration->getPaths()) + , ignore_non_existent_files(ignore_non_existent_files_) +{ + if (read_keys_) + { + /// TODO: should we add metadata if we anyway fetch it if file_progress_callback is passed? + for (auto && key : keys) + { + auto object_info = std::make_shared(key); + read_keys_->emplace_back(object_info); + } + } +} + +StorageObjectStorage::ObjectInfoPtr StorageObjectStorageSource::KeysIterator::nextImpl(size_t /* processor */) +{ + while (true) + { + size_t current_index = index.fetch_add(1, std::memory_order_relaxed); + if (current_index >= keys.size()) + return {}; + + auto key = keys[current_index]; + + ObjectMetadata object_metadata{}; + if (ignore_non_existent_files) + { + auto metadata = object_storage->tryGetObjectMetadata(key); + if (!metadata) + continue; + } + else + object_metadata = object_storage->getObjectMetadata(key); + + if (file_progress_callback) + file_progress_callback(FileProgress(0, object_metadata.size_bytes)); + + return std::make_shared(key, object_metadata); + } +} + +StorageObjectStorageSource::ReaderHolder::ReaderHolder( + ObjectInfoPtr object_info_, + std::unique_ptr read_buf_, + std::shared_ptr source_, + std::unique_ptr pipeline_, + std::unique_ptr reader_) + : object_info(std::move(object_info_)) + , read_buf(std::move(read_buf_)) + , source(std::move(source_)) + , pipeline(std::move(pipeline_)) + , reader(std::move(reader_)) +{ +} + +StorageObjectStorageSource::ReaderHolder & +StorageObjectStorageSource::ReaderHolder::operator=(ReaderHolder && other) noexcept +{ + /// The order of destruction is important. + /// reader uses pipeline, pipeline uses read_buf. + reader = std::move(other.reader); + pipeline = std::move(other.pipeline); + source = std::move(other.source); + read_buf = std::move(other.read_buf); + object_info = std::move(other.object_info); + return *this; +} + +StorageObjectStorageSource::ReadTaskIterator::ReadTaskIterator( + const ReadTaskCallback & callback_, size_t max_threads_count) + : IIterator("ReadTaskIterator") + , callback(callback_) +{ + ThreadPool pool( + CurrentMetrics::StorageObjectStorageThreads, + CurrentMetrics::StorageObjectStorageThreadsActive, + CurrentMetrics::StorageObjectStorageThreadsScheduled, max_threads_count); + + auto pool_scheduler = threadPoolCallbackRunnerUnsafe(pool, "ReadTaskIter"); + + std::vector> keys; + keys.reserve(max_threads_count); + for (size_t i = 0; i < max_threads_count; ++i) + keys.push_back(pool_scheduler([this] { return callback(); }, Priority{})); + + pool.wait(); + buffer.reserve(max_threads_count); + for (auto & key_future : keys) + { + auto key = key_future.get(); + if (!key.empty()) + buffer.emplace_back(std::make_shared(key, std::nullopt)); + } +} + +StorageObjectStorage::ObjectInfoPtr StorageObjectStorageSource::ReadTaskIterator::nextImpl(size_t) +{ + size_t current_index = index.fetch_add(1, std::memory_order_relaxed); + if (current_index >= buffer.size()) + return std::make_shared(callback()); + + return buffer[current_index]; +} + +static IArchiveReader::NameFilter createArchivePathFilter(const std::string & archive_pattern) +{ + auto matcher = std::make_shared(makeRegexpPatternFromGlobs(archive_pattern)); + if (!matcher->ok()) + { + throw Exception(ErrorCodes::CANNOT_COMPILE_REGEXP, + "Cannot compile regex from glob ({}): {}", + archive_pattern, matcher->error()); + } + return [matcher](const std::string & p) mutable { return re2::RE2::FullMatch(p, *matcher); }; +} + +StorageObjectStorageSource::ArchiveIterator::ObjectInfoInArchive::ObjectInfoInArchive( + ObjectInfoPtr archive_object_, + const std::string & path_in_archive_, + std::shared_ptr archive_reader_) + : archive_object(archive_object_) + , path_in_archive(path_in_archive_) + , archive_reader(archive_reader_) +{ +} + +StorageObjectStorageSource::ArchiveIterator::ArchiveIterator( + ObjectStoragePtr object_storage_, + ConfigurationPtr configuration_, + std::unique_ptr archives_iterator_, + ContextPtr context_, + ObjectInfos * read_keys_) + : IIterator("ArchiveIterator") + , WithContext(context_) + , object_storage(object_storage_) + , is_path_in_archive_with_globs(configuration_->isPathInArchiveWithGlobs()) + , archives_iterator(std::move(archives_iterator_)) + , filter(is_path_in_archive_with_globs ? createArchivePathFilter(configuration_->getPathInArchive()) : IArchiveReader::NameFilter{}) + , path_in_archive(is_path_in_archive_with_globs ? "" : configuration_->getPathInArchive()) + , read_keys(read_keys_) +{ +} + +std::shared_ptr +StorageObjectStorageSource::ArchiveIterator::createArchiveReader(ObjectInfoPtr object_info) const +{ + const auto size = object_info->metadata->size_bytes; + return DB::createArchiveReader( + /* path_to_archive */object_info->getPath(), + /* archive_read_function */[=, this]() + { + StoredObject stored_object(object_info->getPath(), "", size); + return object_storage->readObject(stored_object, getContext()->getReadSettings()); + }, + /* archive_size */size); +} + +StorageObjectStorageSource::ObjectInfoPtr +StorageObjectStorageSource::ArchiveIterator::nextImpl(size_t processor) +{ + std::unique_lock lock{next_mutex}; + while (true) + { + if (filter) + { + if (!file_enumerator) + { + archive_object = archives_iterator->next(processor); + if (!archive_object) + return {}; + + archive_reader = createArchiveReader(archive_object); + file_enumerator = archive_reader->firstFile(); + if (!file_enumerator) + continue; + } + else if (!file_enumerator->nextFile()) + { + file_enumerator.reset(); + continue; + } + + path_in_archive = file_enumerator->getFileName(); + if (!filter(path_in_archive)) + continue; + } + else + { + archive_object = archives_iterator->next(processor); + if (!archive_object) + return {}; + + if (!archive_object->metadata) + archive_object->metadata = object_storage->getObjectMetadata(archive_object->getPath()); + + archive_reader = createArchiveReader(archive_object); + if (!archive_reader->fileExists(path_in_archive)) + continue; + } + + auto object_in_archive = std::make_shared(archive_object, path_in_archive, archive_reader); + + if (read_keys != nullptr) + read_keys->push_back(object_in_archive); + + return object_in_archive; + } +} + +size_t StorageObjectStorageSource::ArchiveIterator::estimatedKeysCount() +{ + return archives_iterator->estimatedKeysCount(); +} + +} diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.h b/src/Storages/ObjectStorage/StorageObjectStorageSource.h new file mode 100644 index 00000000000..fd7c7aa7102 --- /dev/null +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.h @@ -0,0 +1,310 @@ +#pragma once +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +class SchemaCache; + +class StorageObjectStorageSource : public SourceWithKeyCondition, WithContext +{ + friend class StorageS3QueueSource; +public: + using Configuration = StorageObjectStorage::Configuration; + using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr; + using ObjectInfo = StorageObjectStorage::ObjectInfo; + using ObjectInfos = StorageObjectStorage::ObjectInfos; + using ObjectInfoPtr = StorageObjectStorage::ObjectInfoPtr; + + class IIterator; + class ReadTaskIterator; + class GlobIterator; + class KeysIterator; + class ArchiveIterator; + + StorageObjectStorageSource( + String name_, + ObjectStoragePtr object_storage_, + ConfigurationPtr configuration, + const ReadFromFormatInfo & info, + const std::optional & format_settings_, + ContextPtr context_, + UInt64 max_block_size_, + std::shared_ptr file_iterator_, + size_t max_parsing_threads_, + bool need_only_count_); + + ~StorageObjectStorageSource() override; + + String getName() const override { return name; } + + void setKeyCondition(const ActionsDAGPtr & filter_actions_dag, ContextPtr context_) override; + + Chunk generate() override; + + static std::shared_ptr createFileIterator( + ConfigurationPtr configuration, + ObjectStoragePtr object_storage, + bool distributed_processing, + const ContextPtr & local_context, + const ActionsDAG::Node * predicate, + const NamesAndTypesList & virtual_columns, + ObjectInfos * read_keys, + std::function file_progress_callback = {}); + + static std::string getUniqueStoragePathIdentifier( + const Configuration & configuration, + const ObjectInfo & object_info, + bool include_connection_info = true); + +protected: + const String name; + ObjectStoragePtr object_storage; + const ConfigurationPtr configuration; + const std::optional format_settings; + const UInt64 max_block_size; + const bool need_only_count; + const size_t max_parsing_threads; + const ReadFromFormatInfo read_from_format_info; + const std::shared_ptr create_reader_pool; + + ColumnsDescription columns_desc; + std::shared_ptr file_iterator; + SchemaCache & schema_cache; + bool initialized = false; + size_t total_rows_in_file = 0; + LoggerPtr log = getLogger("StorageObjectStorageSource"); + + struct ReaderHolder : private boost::noncopyable + { + public: + ReaderHolder( + ObjectInfoPtr object_info_, + std::unique_ptr read_buf_, + std::shared_ptr source_, + std::unique_ptr pipeline_, + std::unique_ptr reader_); + + ReaderHolder() = default; + ReaderHolder(ReaderHolder && other) noexcept { *this = std::move(other); } + ReaderHolder & operator=(ReaderHolder && other) noexcept; + + explicit operator bool() const { return reader != nullptr; } + PullingPipelineExecutor * operator->() { return reader.get(); } + const PullingPipelineExecutor * operator->() const { return reader.get(); } + + const ObjectInfo & getObjectInfo() const { return *object_info; } + const IInputFormat * getInputFormat() const { return dynamic_cast(source.get()); } + + private: + ObjectInfoPtr object_info; + std::unique_ptr read_buf; + std::shared_ptr source; + std::unique_ptr pipeline; + std::unique_ptr reader; + }; + + ReaderHolder reader; + ThreadPoolCallbackRunnerUnsafe create_reader_scheduler; + std::future reader_future; + + /// Recreate ReadBuffer and Pipeline for each file. + ReaderHolder createReader(size_t processor = 0); + std::future createReaderAsync(size_t processor = 0); + std::unique_ptr createReadBuffer(const ObjectInfo & object_info); + + void addNumRowsToCache(const ObjectInfo & object_info, size_t num_rows); + std::optional tryGetNumRowsFromCache(const ObjectInfo & object_info); + void lazyInitialize(size_t processor); +}; + +class StorageObjectStorageSource::IIterator +{ +public: + explicit IIterator(const std::string & logger_name_); + + virtual ~IIterator() = default; + + virtual size_t estimatedKeysCount() = 0; + + ObjectInfoPtr next(size_t processor); + +protected: + virtual ObjectInfoPtr nextImpl(size_t processor) = 0; + LoggerPtr logger; +}; + +class StorageObjectStorageSource::ReadTaskIterator : public IIterator +{ +public: + ReadTaskIterator(const ReadTaskCallback & callback_, size_t max_threads_count); + + size_t estimatedKeysCount() override { return buffer.size(); } + +private: + ObjectInfoPtr nextImpl(size_t) override; + + ReadTaskCallback callback; + ObjectInfos buffer; + std::atomic_size_t index = 0; +}; + +class StorageObjectStorageSource::GlobIterator : public IIterator, WithContext +{ +public: + GlobIterator( + ObjectStoragePtr object_storage_, + ConfigurationPtr configuration_, + const ActionsDAG::Node * predicate, + const NamesAndTypesList & virtual_columns_, + ContextPtr context_, + ObjectInfos * read_keys_, + size_t list_object_keys_size, + bool throw_on_zero_files_match_, + std::function file_progress_callback_ = {}); + + ~GlobIterator() override = default; + + size_t estimatedKeysCount() override; + +private: + ObjectInfoPtr nextImpl(size_t processor) override; + ObjectInfoPtr nextImplUnlocked(size_t processor); + void createFilterAST(const String & any_key); + void fillBufferForKey(const std::string & uri_key); + + const ObjectStoragePtr object_storage; + const ConfigurationPtr configuration; + const NamesAndTypesList virtual_columns; + const bool throw_on_zero_files_match; + + size_t index = 0; + + ObjectInfos object_infos; + ObjectInfos * read_keys; + ActionsDAGPtr filter_dag; + ObjectStorageIteratorPtr object_storage_iterator; + bool recursive{false}; + std::vector expanded_keys; + std::vector::iterator expanded_keys_iter; + + std::unique_ptr matcher; + + bool is_finished = false; + bool first_iteration = true; + std::mutex next_mutex; + + std::function file_progress_callback; +}; + +class StorageObjectStorageSource::KeysIterator : public IIterator +{ +public: + KeysIterator( + ObjectStoragePtr object_storage_, + ConfigurationPtr configuration_, + const NamesAndTypesList & virtual_columns_, + ObjectInfos * read_keys_, + bool ignore_non_existent_files_, + std::function file_progress_callback = {}); + + ~KeysIterator() override = default; + + size_t estimatedKeysCount() override { return keys.size(); } + +private: + ObjectInfoPtr nextImpl(size_t processor) override; + + const ObjectStoragePtr object_storage; + const ConfigurationPtr configuration; + const NamesAndTypesList virtual_columns; + const std::function file_progress_callback; + const std::vector keys; + std::atomic index = 0; + bool ignore_non_existent_files; +}; + +/* + * An archives iterator. + * Allows to iterate files inside one or many archives. + * `archives_iterator` is an iterator which iterates over different archives. + * There are two ways to read files in archives: + * 1. When we want to read one concete file in each archive. + * In this case we go through all archives, check if this certain file + * exists within this archive and read it if it exists. + * 2. When we have a certain pattern of files we want to read in each archive. + * For this purpose we create a filter defined as IArchiveReader::NameFilter. + */ +class StorageObjectStorageSource::ArchiveIterator : public IIterator, private WithContext +{ +public: + explicit ArchiveIterator( + ObjectStoragePtr object_storage_, + ConfigurationPtr configuration_, + std::unique_ptr archives_iterator_, + ContextPtr context_, + ObjectInfos * read_keys_); + + size_t estimatedKeysCount() override; + + struct ObjectInfoInArchive : public ObjectInfo + { + ObjectInfoInArchive( + ObjectInfoPtr archive_object_, + const std::string & path_in_archive_, + std::shared_ptr archive_reader_); + + std::string getFileName() const override + { + return path_in_archive; + } + + std::string getPath() const override + { + return archive_object->getPath() + "::" + path_in_archive; + } + + std::string getPathToArchive() const override + { + return archive_object->getPath(); + } + + bool isArchive() const override { return true; } + + const ObjectInfoPtr archive_object; + const std::string path_in_archive; + const std::shared_ptr archive_reader; + }; + +private: + ObjectInfoPtr nextImpl(size_t processor) override; + std::shared_ptr createArchiveReader(ObjectInfoPtr object_info) const; + + const ObjectStoragePtr object_storage; + const bool is_path_in_archive_with_globs; + /// Iterator which iterates through different archives. + const std::unique_ptr archives_iterator; + /// Used when files inside archive are defined with a glob + const IArchiveReader::NameFilter filter = {}; + /// Current file inside the archive. + std::string path_in_archive = {}; + /// Read keys of files inside archives. + ObjectInfos * read_keys; + /// Object pointing to archive (NOT path within archive). + ObjectInfoPtr archive_object; + /// Reader of the archive. + std::shared_ptr archive_reader; + /// File enumerator inside the archive. + std::unique_ptr file_enumerator; + + std::mutex next_mutex; +}; + +} diff --git a/src/Storages/ObjectStorage/Utils.cpp b/src/Storages/ObjectStorage/Utils.cpp new file mode 100644 index 00000000000..e49e14d2a0c --- /dev/null +++ b/src/Storages/ObjectStorage/Utils.cpp @@ -0,0 +1,76 @@ +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + +std::optional checkAndGetNewFileOnInsertIfNeeded( + const IObjectStorage & object_storage, + const StorageObjectStorage::Configuration & configuration, + const StorageObjectStorage::QuerySettings & settings, + const String & key, + size_t sequence_number) +{ + if (settings.truncate_on_insert + || !object_storage.exists(StoredObject(key))) + return std::nullopt; + + if (settings.create_new_file_on_insert) + { + auto pos = key.find_first_of('.'); + String new_key; + do + { + new_key = key.substr(0, pos) + "." + std::to_string(sequence_number) + (pos == std::string::npos ? "" : key.substr(pos)); + ++sequence_number; + } + while (object_storage.exists(StoredObject(new_key))); + + return new_key; + } + + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Object in bucket {} with key {} already exists. " + "If you want to overwrite it, enable setting {}_truncate_on_insert, if you " + "want to create a new file on each insert, enable setting {}_create_new_file_on_insert", + configuration.getNamespace(), key, configuration.getTypeName(), configuration.getTypeName()); +} + +void resolveSchemaAndFormat( + ColumnsDescription & columns, + std::string & format, + ObjectStoragePtr object_storage, + const StorageObjectStorage::ConfigurationPtr & configuration, + std::optional format_settings, + const ContextPtr & context) +{ + if (columns.empty()) + { + if (format == "auto") + std::tie(columns, format) = + StorageObjectStorage::resolveSchemaAndFormatFromData(object_storage, configuration, format_settings, context); + else + columns = StorageObjectStorage::resolveSchemaFromData(object_storage, configuration, format_settings, context); + } + else if (format == "auto") + { + format = StorageObjectStorage::resolveFormatFromData(object_storage, configuration, format_settings, context); + } + + if (!columns.hasOnlyOrdinary()) + { + /// We don't allow special columns. + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Special columns are not supported for {} storage" + "like MATERIALIZED, ALIAS or EPHEMERAL", configuration->getTypeName()); + } +} + +} diff --git a/src/Storages/ObjectStorage/Utils.h b/src/Storages/ObjectStorage/Utils.h new file mode 100644 index 00000000000..2077999df41 --- /dev/null +++ b/src/Storages/ObjectStorage/Utils.h @@ -0,0 +1,24 @@ +#pragma once +#include "StorageObjectStorage.h" + +namespace DB +{ + +class IObjectStorage; + +std::optional checkAndGetNewFileOnInsertIfNeeded( + const IObjectStorage & object_storage, + const StorageObjectStorage::Configuration & configuration, + const StorageObjectStorage::QuerySettings & settings, + const std::string & key, + size_t sequence_number); + +void resolveSchemaAndFormat( + ColumnsDescription & columns, + std::string & format, + ObjectStoragePtr object_storage, + const StorageObjectStorage::ConfigurationPtr & configuration, + std::optional format_settings, + const ContextPtr & context); + +} diff --git a/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp b/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp new file mode 100644 index 00000000000..bf595b2f5d4 --- /dev/null +++ b/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp @@ -0,0 +1,157 @@ +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +#if USE_AWS_S3 || USE_AZURE_BLOB_STORAGE || USE_HDFS + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + +static std::shared_ptr createStorageObjectStorage( + const StorageFactory::Arguments & args, + StorageObjectStorage::ConfigurationPtr configuration, + ContextPtr context) +{ + auto & engine_args = args.engine_args; + if (engine_args.empty()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "External data source must have arguments"); + + StorageObjectStorage::Configuration::initialize(*configuration, args.engine_args, context, false); + + // Use format settings from global server context + settings from + // the SETTINGS clause of the create query. Settings from current + // session and user are ignored. + std::optional format_settings; + if (args.storage_def->settings) + { + FormatFactorySettings user_format_settings; + + // Apply changed settings from global context, but ignore the + // unknown ones, because we only have the format settings here. + const auto & changes = context->getSettingsRef().changes(); + for (const auto & change : changes) + { + if (user_format_settings.has(change.name)) + user_format_settings.set(change.name, change.value); + } + + // Apply changes from SETTINGS clause, with validation. + user_format_settings.applyChanges(args.storage_def->settings->changes); + format_settings = getFormatSettings(context, user_format_settings); + } + else + { + format_settings = getFormatSettings(context); + } + + ASTPtr partition_by; + if (args.storage_def->partition_by) + partition_by = args.storage_def->partition_by->clone(); + + return std::make_shared( + configuration, + configuration->createObjectStorage(context, /* is_readonly */false), + args.getContext(), + args.table_id, + args.columns, + args.constraints, + args.comment, + format_settings, + /* distributed_processing */ false, + partition_by); +} + +#endif + +#if USE_AZURE_BLOB_STORAGE +void registerStorageAzure(StorageFactory & factory) +{ + factory.registerStorage("AzureBlobStorage", [](const StorageFactory::Arguments & args) + { + auto configuration = std::make_shared(); + return createStorageObjectStorage(args, configuration, args.getLocalContext()); + }, + { + .supports_settings = true, + .supports_sort_order = true, // for partition by + .supports_schema_inference = true, + .source_access_type = AccessType::AZURE, + }); +} +#endif + +#if USE_AWS_S3 +void registerStorageS3Impl(const String & name, StorageFactory & factory) +{ + factory.registerStorage(name, [=](const StorageFactory::Arguments & args) + { + auto configuration = std::make_shared(); + return createStorageObjectStorage(args, configuration, args.getLocalContext()); + }, + { + .supports_settings = true, + .supports_sort_order = true, // for partition by + .supports_schema_inference = true, + .source_access_type = AccessType::S3, + }); +} + +void registerStorageS3(StorageFactory & factory) +{ + registerStorageS3Impl("S3", factory); +} + +void registerStorageCOS(StorageFactory & factory) +{ + registerStorageS3Impl("COSN", factory); +} + +void registerStorageOSS(StorageFactory & factory) +{ + registerStorageS3Impl("OSS", factory); +} + +#endif + +#if USE_HDFS +void registerStorageHDFS(StorageFactory & factory) +{ + factory.registerStorage("HDFS", [=](const StorageFactory::Arguments & args) + { + auto configuration = std::make_shared(); + return createStorageObjectStorage(args, configuration, args.getLocalContext()); + }, + { + .supports_settings = true, + .supports_sort_order = true, // for partition by + .supports_schema_inference = true, + .source_access_type = AccessType::HDFS, + }); +} +#endif + +void registerStorageObjectStorage(StorageFactory & factory) +{ +#if USE_AWS_S3 + registerStorageS3(factory); + registerStorageCOS(factory); + registerStorageOSS(factory); +#endif +#if USE_AZURE_BLOB_STORAGE + registerStorageAzure(factory); +#endif +#if USE_HDFS + registerStorageHDFS(factory); +#endif + UNUSED(factory); +} + +} diff --git a/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp b/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp index 7094578a9cc..0baa234e7a3 100644 --- a/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp +++ b/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp @@ -155,7 +155,7 @@ std::vector EmbeddedRocksDBBulkSink::squash(Chunk chunk) return {}; } -std::pair EmbeddedRocksDBBulkSink::serializeChunks(const std::vector & input_chunks) const +std::pair EmbeddedRocksDBBulkSink::serializeChunks(std::vector && input_chunks) const { auto serialized_key_column = ColumnString::create(); auto serialized_value_column = ColumnString::create(); @@ -168,7 +168,7 @@ std::pair EmbeddedRocksDBBulkSink::seriali WriteBufferFromVector writer_key(serialized_key_data); WriteBufferFromVector writer_value(serialized_value_data); - for (const auto & chunk : input_chunks) + for (auto && chunk : input_chunks) { const auto & columns = chunk.getColumns(); auto rows = chunk.getNumRows(); @@ -193,13 +193,14 @@ std::pair EmbeddedRocksDBBulkSink::seriali void EmbeddedRocksDBBulkSink::consume(Chunk chunk_) { - std::vector to_written = squash(std::move(chunk_)); + std::vector chunks_to_write = squash(std::move(chunk_)); - if (to_written.empty()) + if (chunks_to_write.empty()) return; - auto [serialized_key_column, serialized_value_column] = serializeChunks(to_written); + auto [serialized_key_column, serialized_value_column] = serializeChunks(std::move(chunks_to_write)); auto sst_file_path = getTemporarySSTFilePath(); + LOG_DEBUG(getLogger("EmbeddedRocksDBBulkSink"), "Writing {} rows to SST file {}", serialized_key_column->size(), sst_file_path); if (auto status = buildSSTFile(sst_file_path, *serialized_key_column, *serialized_value_column); !status.ok()) throw Exception(ErrorCodes::ROCKSDB_ERROR, "RocksDB write error: {}", status.ToString()); @@ -209,6 +210,7 @@ void EmbeddedRocksDBBulkSink::consume(Chunk chunk_) if (auto status = storage.rocksdb_ptr->IngestExternalFile({sst_file_path}, ingest_options); !status.ok()) throw Exception(ErrorCodes::ROCKSDB_ERROR, "RocksDB write error: {}", status.ToString()); + LOG_DEBUG(getLogger("EmbeddedRocksDBBulkSink"), "SST file {} has been ingested", sst_file_path); if (fs::exists(sst_file_path)) (void)fs::remove(sst_file_path); } @@ -237,4 +239,5 @@ bool EmbeddedRocksDBBulkSink::isEnoughSize(const Chunk & chunk) const { return chunk.getNumRows() >= min_block_size_rows; } + } diff --git a/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.h b/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.h index 19ce1e3b83e..46193b152ca 100644 --- a/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.h +++ b/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.h @@ -49,7 +49,7 @@ private: bool isEnoughSize(const std::vector & input_chunks) const; bool isEnoughSize(const Chunk & chunk) const; /// Serialize chunks to rocksdb key-value pairs - std::pair serializeChunks(const std::vector & input_chunks) const; + std::pair serializeChunks(std::vector && input_chunks) const; StorageEmbeddedRocksDB & storage; StorageMetadataPtr metadata_snapshot; diff --git a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp index 1a9aa6d0f41..c3b7ae64c7e 100644 --- a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp +++ b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp @@ -189,6 +189,7 @@ StorageEmbeddedRocksDB::StorageEmbeddedRocksDB(const StorageID & table_id_, , rocksdb_dir(std::move(rocksdb_dir_)) , ttl(ttl_) , read_only(read_only_) + , log(getLogger(fmt::format("StorageEmbeddedRocksDB ({})", getStorageID().getNameForLogs()))) { setInMemoryMetadata(metadata_); setSettings(std::move(settings_)); @@ -316,6 +317,7 @@ void StorageEmbeddedRocksDB::mutate(const MutationCommands & commands, ContextPt void StorageEmbeddedRocksDB::drop() { + std::lock_guard lock(rocksdb_ptr_mx); rocksdb_ptr->Close(); rocksdb_ptr = nullptr; } @@ -463,18 +465,13 @@ void StorageEmbeddedRocksDB::initDB() { rocksdb::DB * db; if (read_only) - { status = rocksdb::DB::OpenForReadOnly(merged, rocksdb_dir, &db); - } else - { status = rocksdb::DB::Open(merged, rocksdb_dir, &db); - } + if (!status.ok()) - { - throw Exception(ErrorCodes::ROCKSDB_ERROR, "Failed to open rocksdb path at: {}: {}", - rocksdb_dir, status.ToString()); - } + throw Exception(ErrorCodes::ROCKSDB_ERROR, "Failed to open rocksdb path at: {}: {}", rocksdb_dir, status.ToString()); + rocksdb_ptr = std::unique_ptr(db); } } @@ -589,8 +586,12 @@ SinkToStoragePtr StorageEmbeddedRocksDB::write( const ASTPtr & /*query*/, const StorageMetadataPtr & metadata_snapshot, ContextPtr query_context, bool /*async_insert*/) { if (getSettings().optimize_for_bulk_insert) + { + LOG_DEBUG(log, "Using bulk insert"); return std::make_shared(query_context, *this, metadata_snapshot); + } + LOG_DEBUG(log, "Using regular insert"); return std::make_shared(*this, metadata_snapshot); } diff --git a/src/Storages/RocksDB/StorageEmbeddedRocksDB.h b/src/Storages/RocksDB/StorageEmbeddedRocksDB.h index 9fc58ea6b38..61592398954 100644 --- a/src/Storages/RocksDB/StorageEmbeddedRocksDB.h +++ b/src/Storages/RocksDB/StorageEmbeddedRocksDB.h @@ -124,5 +124,7 @@ private: bool read_only; void initDB(); + + LoggerPtr log; }; } diff --git a/src/Storages/S3Queue/S3QueueSource.cpp b/src/Storages/S3Queue/S3QueueSource.cpp index b5bee2cc8da..c8aaece0711 100644 --- a/src/Storages/S3Queue/S3QueueSource.cpp +++ b/src/Storages/S3Queue/S3QueueSource.cpp @@ -5,9 +5,9 @@ #include #include #include -#include #include #include +#include namespace CurrentMetrics @@ -31,11 +31,11 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -StorageS3QueueSource::S3QueueKeyWithInfo::S3QueueKeyWithInfo( +StorageS3QueueSource::S3QueueObjectInfo::S3QueueObjectInfo( const std::string & key_, - std::optional info_, + const ObjectMetadata & object_metadata_, Metadata::ProcessingNodeHolderPtr processing_holder_) - : StorageS3Source::KeyWithInfo(key_, info_) + : ObjectInfo(key_, object_metadata_) , processing_holder(processing_holder_) { } @@ -46,7 +46,8 @@ StorageS3QueueSource::FileIterator::FileIterator( size_t current_shard_, std::atomic & shutdown_called_, LoggerPtr logger_) - : metadata(metadata_) + : StorageObjectStorageSource::IIterator("S3QueueIterator") + , metadata(metadata_) , glob_iterator(std::move(glob_iterator_)) , shutdown_called(shutdown_called_) , log(logger_) @@ -56,15 +57,15 @@ StorageS3QueueSource::FileIterator::FileIterator( if (sharded_processing) { for (const auto & id : metadata->getProcessingIdsForShard(current_shard)) - sharded_keys.emplace(id, std::deque{}); + sharded_keys.emplace(id, std::deque{}); } } -StorageS3QueueSource::KeyWithInfoPtr StorageS3QueueSource::FileIterator::next(size_t idx) +StorageS3QueueSource::ObjectInfoPtr StorageS3QueueSource::FileIterator::nextImpl(size_t processor) { while (!shutdown_called) { - KeyWithInfoPtr val{nullptr}; + ObjectInfoPtr val{nullptr}; { std::unique_lock lk(sharded_keys_mutex, std::defer_lock); @@ -74,36 +75,36 @@ StorageS3QueueSource::KeyWithInfoPtr StorageS3QueueSource::FileIterator::next(si /// we need to check sharded_keys and to next() under lock. lk.lock(); - if (auto it = sharded_keys.find(idx); it != sharded_keys.end()) + if (auto it = sharded_keys.find(processor); it != sharded_keys.end()) { auto & keys = it->second; if (!keys.empty()) { val = keys.front(); keys.pop_front(); - chassert(idx == metadata->getProcessingIdForPath(val->key)); + chassert(processor == metadata->getProcessingIdForPath(val->relative_path)); } } else { throw Exception(ErrorCodes::LOGICAL_ERROR, "Processing id {} does not exist (Expected ids: {})", - idx, fmt::join(metadata->getProcessingIdsForShard(current_shard), ", ")); + processor, fmt::join(metadata->getProcessingIdsForShard(current_shard), ", ")); } } if (!val) { - val = glob_iterator->next(); + val = glob_iterator->next(processor); if (val && sharded_processing) { - const auto processing_id_for_key = metadata->getProcessingIdForPath(val->key); - if (idx != processing_id_for_key) + const auto processing_id_for_key = metadata->getProcessingIdForPath(val->relative_path); + if (processor != processing_id_for_key) { if (metadata->isProcessingIdBelongsToShard(processing_id_for_key, current_shard)) { LOG_TEST(log, "Putting key {} into queue of processor {} (total: {})", - val->key, processing_id_for_key, sharded_keys.size()); + val->relative_path, processing_id_for_key, sharded_keys.size()); if (auto it = sharded_keys.find(processing_id_for_key); it != sharded_keys.end()) { @@ -131,25 +132,25 @@ StorageS3QueueSource::KeyWithInfoPtr StorageS3QueueSource::FileIterator::next(si return {}; } - auto processing_holder = metadata->trySetFileAsProcessing(val->key); + auto processing_holder = metadata->trySetFileAsProcessing(val->relative_path); if (shutdown_called) { LOG_TEST(log, "Shutdown was called, stopping file iterator"); return {}; } - LOG_TEST(log, "Checking if can process key {} for processing_id {}", val->key, idx); + LOG_TEST(log, "Checking if can process key {} for processing_id {}", val->relative_path, processor); if (processing_holder) { - return std::make_shared(val->key, val->info, processing_holder); + return std::make_shared(val->relative_path, val->metadata.value(), processing_holder); } else if (sharded_processing - && metadata->getFileStatus(val->key)->state == S3QueueFilesMetadata::FileStatus::State::Processing) + && metadata->getFileStatus(val->relative_path)->state == S3QueueFilesMetadata::FileStatus::State::Processing) { throw Exception(ErrorCodes::LOGICAL_ERROR, "File {} is processing by someone else in sharded processing. " - "It is a bug", val->key); + "It is a bug", val->relative_path); } } return {}; @@ -163,7 +164,7 @@ size_t StorageS3QueueSource::FileIterator::estimatedKeysCount() StorageS3QueueSource::StorageS3QueueSource( String name_, const Block & header_, - std::unique_ptr internal_source_, + std::unique_ptr internal_source_, std::shared_ptr files_metadata_, size_t processing_id_, const S3QueueAction & action_, @@ -192,11 +193,6 @@ StorageS3QueueSource::StorageS3QueueSource( { } -StorageS3QueueSource::~StorageS3QueueSource() -{ - internal_source->create_reader_pool.wait(); -} - String StorageS3QueueSource::getName() const { return name; @@ -223,7 +219,7 @@ Chunk StorageS3QueueSource::generate() if (!reader) break; - const auto * key_with_info = dynamic_cast(&reader.getKeyWithInfo()); + const auto * key_with_info = dynamic_cast(&reader.getObjectInfo()); auto file_status = key_with_info->processing_holder->getFileStatus(); if (isCancelled()) @@ -239,15 +235,17 @@ Chunk StorageS3QueueSource::generate() catch (...) { LOG_ERROR(log, "Failed to set file {} as failed: {}", - key_with_info->key, getCurrentExceptionMessage(true)); + key_with_info->relative_path, getCurrentExceptionMessage(true)); } - appendLogElement(reader.getFile(), *file_status, processed_rows_from_file, false); + appendLogElement(reader.getObjectInfo().getPath(), *file_status, processed_rows_from_file, false); } break; } + const auto & path = reader.getObjectInfo().getPath(); + if (shutdown_called) { if (processed_rows_from_file == 0) @@ -257,7 +255,7 @@ Chunk StorageS3QueueSource::generate() { LOG_DEBUG( log, "Table is being dropped, {} rows are already processed from {}, but file is not fully processed", - processed_rows_from_file, reader.getFile()); + processed_rows_from_file, path); try { @@ -266,10 +264,10 @@ Chunk StorageS3QueueSource::generate() catch (...) { LOG_ERROR(log, "Failed to set file {} as failed: {}", - key_with_info->key, getCurrentExceptionMessage(true)); + key_with_info->relative_path, getCurrentExceptionMessage(true)); } - appendLogElement(reader.getFile(), *file_status, processed_rows_from_file, false); + appendLogElement(path, *file_status, processed_rows_from_file, false); /// Leave the file half processed. Table is being dropped, so we do not care. break; @@ -277,7 +275,7 @@ Chunk StorageS3QueueSource::generate() LOG_DEBUG(log, "Shutdown called, but file {} is partially processed ({} rows). " "Will process the file fully and then shutdown", - reader.getFile(), processed_rows_from_file); + path, processed_rows_from_file); } auto * prev_scope = CurrentThread::get().attachProfileCountersScope(&file_status->profile_counters); @@ -291,30 +289,31 @@ Chunk StorageS3QueueSource::generate() Chunk chunk; if (reader->pull(chunk)) { - LOG_TEST(log, "Read {} rows from file: {}", chunk.getNumRows(), reader.getPath()); + LOG_TEST(log, "Read {} rows from file: {}", chunk.getNumRows(), path); file_status->processed_rows += chunk.getNumRows(); processed_rows_from_file += chunk.getNumRows(); - VirtualColumnUtils::addRequestedPathFileAndSizeVirtualsToChunk(chunk, requested_virtual_columns, reader.getPath(), reader.getKeyWithInfo().info->size); + VirtualColumnUtils::addRequestedPathFileAndSizeVirtualsToChunk( + chunk, requested_virtual_columns, path, reader.getObjectInfo().metadata->size_bytes); return chunk; } } catch (...) { const auto message = getCurrentExceptionMessage(true); - LOG_ERROR(log, "Got an error while pulling chunk. Will set file {} as failed. Error: {} ", reader.getFile(), message); + LOG_ERROR(log, "Got an error while pulling chunk. Will set file {} as failed. Error: {} ", path, message); files_metadata->setFileFailed(key_with_info->processing_holder, message); - appendLogElement(reader.getFile(), *file_status, processed_rows_from_file, false); + appendLogElement(path, *file_status, processed_rows_from_file, false); throw; } files_metadata->setFileProcessed(key_with_info->processing_holder); - applyActionAfterProcessing(reader.getFile()); + applyActionAfterProcessing(path); - appendLogElement(reader.getFile(), *file_status, processed_rows_from_file, true); + appendLogElement(path, *file_status, processed_rows_from_file, true); file_status.reset(); processed_rows_from_file = 0; @@ -330,11 +329,11 @@ Chunk StorageS3QueueSource::generate() if (!reader) break; - file_status = files_metadata->getFileStatus(reader.getFile()); + file_status = files_metadata->getFileStatus(reader.getObjectInfo().getPath()); /// Even if task is finished the thread may be not freed in pool. /// So wait until it will be freed before scheduling a new task. - internal_source->create_reader_pool.wait(); + internal_source->create_reader_pool->wait(); reader_future = internal_source->createReaderAsync(processing_id); } diff --git a/src/Storages/S3Queue/S3QueueSource.h b/src/Storages/S3Queue/S3QueueSource.h index a657459ed9d..663577e055b 100644 --- a/src/Storages/S3Queue/S3QueueSource.h +++ b/src/Storages/S3Queue/S3QueueSource.h @@ -5,7 +5,8 @@ #include #include #include -#include +#include +#include #include @@ -14,28 +15,35 @@ namespace Poco { class Logger; } namespace DB { +struct ObjectMetadata; + class StorageS3QueueSource : public ISource, WithContext { public: - using IIterator = StorageS3Source::IIterator; - using KeyWithInfoPtr = StorageS3Source::KeyWithInfoPtr; - using GlobIterator = StorageS3Source::DisclosedGlobIterator; + using Storage = StorageObjectStorage; + + using ConfigurationPtr = Storage::ConfigurationPtr; + using GlobIterator = StorageObjectStorageSource::GlobIterator; using ZooKeeperGetter = std::function; using RemoveFileFunc = std::function; using FileStatusPtr = S3QueueFilesMetadata::FileStatusPtr; + using ReaderHolder = StorageObjectStorageSource::ReaderHolder; using Metadata = S3QueueFilesMetadata; + using ObjectInfo = StorageObjectStorageSource::ObjectInfo; + using ObjectInfoPtr = std::shared_ptr; + using ObjectInfos = std::vector; - struct S3QueueKeyWithInfo : public StorageS3Source::KeyWithInfo + struct S3QueueObjectInfo : public ObjectInfo { - S3QueueKeyWithInfo( - const std::string & key_, - std::optional info_, - Metadata::ProcessingNodeHolderPtr processing_holder_); + S3QueueObjectInfo( + const std::string & key_, + const ObjectMetadata & object_metadata_, + Metadata::ProcessingNodeHolderPtr processing_holder_); Metadata::ProcessingNodeHolderPtr processing_holder; }; - class FileIterator : public IIterator + class FileIterator : public StorageObjectStorageSource::IIterator { public: FileIterator( @@ -48,7 +56,7 @@ public: /// Note: /// List results in s3 are always returned in UTF-8 binary order. /// (https://docs.aws.amazon.com/AmazonS3/latest/userguide/ListingKeysUsingAPIs.html) - KeyWithInfoPtr next(size_t idx) override; + ObjectInfoPtr nextImpl(size_t processor) override; size_t estimatedKeysCount() override; @@ -61,14 +69,14 @@ public: const bool sharded_processing; const size_t current_shard; - std::unordered_map> sharded_keys; + std::unordered_map> sharded_keys; std::mutex sharded_keys_mutex; }; StorageS3QueueSource( String name_, const Block & header_, - std::unique_ptr internal_source_, + std::unique_ptr internal_source_, std::shared_ptr files_metadata_, size_t processing_id_, const S3QueueAction & action_, @@ -81,8 +89,6 @@ public: const StorageID & storage_id_, LoggerPtr log_); - ~StorageS3QueueSource() override; - static Block getHeader(Block sample_block, const std::vector & requested_virtual_columns); String getName() const override; @@ -94,7 +100,7 @@ private: const S3QueueAction action; const size_t processing_id; const std::shared_ptr files_metadata; - const std::shared_ptr internal_source; + const std::shared_ptr internal_source; const NamesAndTypesList requested_virtual_columns; const std::atomic & shutdown_called; const std::atomic & table_is_being_dropped; @@ -104,15 +110,14 @@ private: RemoveFileFunc remove_file_func; LoggerPtr log; - using ReaderHolder = StorageS3Source::ReaderHolder; ReaderHolder reader; std::future reader_future; std::atomic initialized{false}; size_t processed_rows_from_file = 0; - void lazyInitialize(); void applyActionAfterProcessing(const String & path); void appendLogElement(const std::string & filename, S3QueueFilesMetadata::FileStatus & file_status_, size_t processed_rows, bool processed); + void lazyInitialize(); }; } diff --git a/src/Storages/S3Queue/S3QueueTableMetadata.cpp b/src/Storages/S3Queue/S3QueueTableMetadata.cpp index 1830bac4743..f0b7568ae7f 100644 --- a/src/Storages/S3Queue/S3QueueTableMetadata.cpp +++ b/src/Storages/S3Queue/S3QueueTableMetadata.cpp @@ -7,7 +7,7 @@ #include #include #include -#include +#include namespace DB @@ -33,7 +33,7 @@ namespace S3QueueTableMetadata::S3QueueTableMetadata( - const StorageS3::Configuration & configuration, + const StorageObjectStorage::Configuration & configuration, const S3QueueSettings & engine_settings, const StorageInMemoryMetadata & storage_metadata) { diff --git a/src/Storages/S3Queue/S3QueueTableMetadata.h b/src/Storages/S3Queue/S3QueueTableMetadata.h index 84087f72a6a..bb8f8ccf2c4 100644 --- a/src/Storages/S3Queue/S3QueueTableMetadata.h +++ b/src/Storages/S3Queue/S3QueueTableMetadata.h @@ -3,7 +3,8 @@ #if USE_AWS_S3 #include -#include +#include +#include #include namespace DB @@ -27,7 +28,10 @@ struct S3QueueTableMetadata UInt64 s3queue_processing_threads_num = 1; S3QueueTableMetadata() = default; - S3QueueTableMetadata(const StorageS3::Configuration & configuration, const S3QueueSettings & engine_settings, const StorageInMemoryMetadata & storage_metadata); + S3QueueTableMetadata( + const StorageObjectStorage::Configuration & configuration, + const S3QueueSettings & engine_settings, + const StorageInMemoryMetadata & storage_metadata); void read(const String & metadata_str); static S3QueueTableMetadata parse(const String & metadata_str); diff --git a/src/Storages/S3Queue/StorageS3Queue.cpp b/src/Storages/S3Queue/StorageS3Queue.cpp index 16e42e32b8a..f8eb288921c 100644 --- a/src/Storages/S3Queue/StorageS3Queue.cpp +++ b/src/Storages/S3Queue/StorageS3Queue.cpp @@ -25,6 +25,8 @@ #include #include #include +#include +#include #include #include @@ -52,11 +54,6 @@ namespace ErrorCodes namespace { - bool containsGlobs(const S3::URI & url) - { - return url.key.find_first_of("*?{") != std::string::npos; - } - std::string chooseZooKeeperPath(const StorageID & table_id, const Settings & settings, const S3QueueSettings & s3queue_settings) { std::string zk_path_prefix = settings.s3queue_default_zookeeper_path.value; @@ -100,7 +97,7 @@ namespace StorageS3Queue::StorageS3Queue( std::unique_ptr s3queue_settings_, - const StorageS3::Configuration & configuration_, + const ConfigurationPtr configuration_, const StorageID & table_id_, const ColumnsDescription & columns_, const ConstraintsDescription & constraints_, @@ -119,15 +116,15 @@ StorageS3Queue::StorageS3Queue( , reschedule_processing_interval_ms(s3queue_settings->s3queue_polling_min_timeout_ms) , log(getLogger("StorageS3Queue (" + table_id_.getFullTableName() + ")")) { - if (configuration.url.key.empty()) + if (configuration->getPath().empty()) { - configuration.url.key = "/*"; + configuration->setPath("/*"); } - else if (configuration.url.key.ends_with('/')) + else if (configuration->getPath().ends_with('/')) { - configuration.url.key += '*'; + configuration->setPath(configuration->getPath() + '*'); } - else if (!containsGlobs(configuration.url)) + else if (!configuration->isPathWithGlobs()) { throw Exception(ErrorCodes::QUERY_NOT_ALLOWED, "S3Queue url must either end with '/' or contain globs"); } @@ -142,31 +139,20 @@ StorageS3Queue::StorageS3Queue( checkAndAdjustSettings(*s3queue_settings, context_->getSettingsRef()); - configuration.update(context_); - FormatFactory::instance().checkFormatName(configuration.format); - context_->getRemoteHostFilter().checkURL(configuration.url.uri); + object_storage = configuration->createObjectStorage(context_, /* is_readonly */true); + FormatFactory::instance().checkFormatName(configuration->format); + configuration->check(context_); + + ColumnsDescription columns{columns_}; + resolveSchemaAndFormat(columns, configuration->format, object_storage, configuration, format_settings, context_); + configuration->check(context_); StorageInMemoryMetadata storage_metadata; - if (columns_.empty()) - { - ColumnsDescription columns; - if (configuration.format == "auto") - std::tie(columns, configuration.format) = StorageS3::getTableStructureAndFormatFromData(configuration, format_settings, context_); - else - columns = StorageS3::getTableStructureFromData(configuration, format_settings, context_); - storage_metadata.setColumns(columns); - } - else - { - if (configuration.format == "auto") - configuration.format = StorageS3::getTableStructureAndFormatFromData(configuration, format_settings, context_).second; - storage_metadata.setColumns(columns_); - } - + storage_metadata.setColumns(columns); storage_metadata.setConstraints(constraints_); storage_metadata.setComment(comment); - setInMemoryMetadata(storage_metadata); setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns())); + setInMemoryMetadata(storage_metadata); LOG_INFO(log, "Using zookeeper path: {}", zk_path.string()); task = getContext()->getSchedulePool().createTask("S3QueueStreamingTask", [this] { threadFunc(); }); @@ -235,7 +221,7 @@ void StorageS3Queue::drop() bool StorageS3Queue::supportsSubsetOfColumns(const ContextPtr & context_) const { - return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration.format, context_, format_settings); + return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration->format, context_, format_settings); } class ReadFromS3Queue : public SourceStepWithFilter @@ -364,44 +350,21 @@ std::shared_ptr StorageS3Queue::createSource( size_t max_block_size, ContextPtr local_context) { - auto configuration_snapshot = updateConfigurationAndGetCopy(local_context); - - auto internal_source = std::make_unique( - info, - configuration.format, + auto internal_source = std::make_unique( getName(), - local_context, + object_storage, + configuration, + info, format_settings, + local_context, max_block_size, - configuration_snapshot.request_settings, - configuration_snapshot.compression_method, - configuration_snapshot.client, - configuration_snapshot.url.bucket, - configuration_snapshot.url.version_id, - configuration_snapshot.url.uri.getHost() + std::to_string(configuration_snapshot.url.uri.getPort()), file_iterator, local_context->getSettingsRef().max_download_threads, false); - auto file_deleter = [this, bucket = configuration_snapshot.url.bucket, client = configuration_snapshot.client, blob_storage_log = BlobStorageLogWriter::create()](const std::string & path) mutable + auto file_deleter = [=, this](const std::string & path) mutable { - S3::DeleteObjectRequest request; - request.WithKey(path).WithBucket(bucket); - auto outcome = client->DeleteObject(request); - if (blob_storage_log) - blob_storage_log->addEvent( - BlobStorageLogElement::EventType::Delete, - bucket, path, {}, 0, outcome.IsSuccess() ? nullptr : &outcome.GetError()); - - if (!outcome.IsSuccess()) - { - const auto & err = outcome.GetError(); - LOG_ERROR(log, "{} (Code: {})", err.GetMessage(), static_cast(err.GetErrorType())); - } - else - { - LOG_TRACE(log, "Object with path {} was removed from S3", path); - } + object_storage->removeObject(StoredObject(path)); }; auto s3_queue_log = s3queue_settings->s3queue_enable_logging_to_s3queue_log ? local_context->getS3QueueLog() : nullptr; return std::make_shared( @@ -495,7 +458,6 @@ bool StorageS3Queue::streamToViews() auto s3queue_context = Context::createCopy(getContext()); s3queue_context->makeQueryContext(); - auto query_configuration = updateConfigurationAndGetCopy(s3queue_context); // Create a stream for each consumer and join them in a union stream // Only insert into dependent views and expect that input blocks contain virtual columns @@ -530,12 +492,6 @@ bool StorageS3Queue::streamToViews() return rows > 0; } -StorageS3Queue::Configuration StorageS3Queue::updateConfigurationAndGetCopy(ContextPtr local_context) -{ - configuration.update(local_context); - return configuration; -} - zkutil::ZooKeeperPtr StorageS3Queue::getZooKeeper() const { return getContext()->getZooKeeper(); @@ -555,7 +511,7 @@ void StorageS3Queue::createOrCheckMetadata(const StorageInMemoryMetadata & stora } else { - std::string metadata = S3QueueTableMetadata(configuration, *s3queue_settings, storage_metadata).toString(); + std::string metadata = S3QueueTableMetadata(*configuration, *s3queue_settings, storage_metadata).toString(); requests.emplace_back(zkutil::makeCreateRequest(zk_path, "", zkutil::CreateMode::Persistent)); requests.emplace_back(zkutil::makeCreateRequest(zk_path / "processed", "", zkutil::CreateMode::Persistent)); requests.emplace_back(zkutil::makeCreateRequest(zk_path / "failed", "", zkutil::CreateMode::Persistent)); @@ -597,7 +553,7 @@ void StorageS3Queue::checkTableStructure(const String & zookeeper_prefix, const String metadata_str = zookeeper->get(fs::path(zookeeper_prefix) / "metadata"); auto metadata_from_zk = S3QueueTableMetadata::parse(metadata_str); - S3QueueTableMetadata old_metadata(configuration, *s3queue_settings, storage_metadata); + S3QueueTableMetadata old_metadata(*configuration, *s3queue_settings, storage_metadata); old_metadata.checkEquals(metadata_from_zk); auto columns_from_zk = ColumnsDescription::parse(metadata_from_zk.columns); @@ -615,14 +571,9 @@ void StorageS3Queue::checkTableStructure(const String & zookeeper_prefix, const std::shared_ptr StorageS3Queue::createFileIterator(ContextPtr local_context, const ActionsDAG::Node * predicate) { - auto glob_iterator = std::make_unique( - *configuration.client, - configuration.url, - predicate, - getVirtualsList(), - local_context, - /* read_keys */ nullptr, - configuration.request_settings); + auto settings = configuration->getQuerySettings(local_context); + auto glob_iterator = std::make_unique( + object_storage, configuration, predicate, getVirtualsList(), local_context, nullptr, settings.list_object_keys_size, settings.throw_on_zero_files_match); return std::make_shared( files_metadata, std::move(glob_iterator), s3queue_settings->s3queue_current_shard_num, shutdown_called, log); @@ -638,7 +589,8 @@ void registerStorageS3Queue(StorageFactory & factory) if (engine_args.empty()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "External data source must have arguments"); - auto configuration = StorageS3::getConfiguration(engine_args, args.getLocalContext()); + auto configuration = std::make_shared(); + StorageObjectStorage::Configuration::initialize(*configuration, args.engine_args, args.getContext(), false); // Use format settings from global server context + settings from // the SETTINGS clause of the create query. Settings from current diff --git a/src/Storages/S3Queue/StorageS3Queue.h b/src/Storages/S3Queue/StorageS3Queue.h index 1f735b47819..83b7bc6667b 100644 --- a/src/Storages/S3Queue/StorageS3Queue.h +++ b/src/Storages/S3Queue/StorageS3Queue.h @@ -8,7 +8,7 @@ #include #include #include -#include +#include #include #include #include @@ -21,11 +21,11 @@ class S3QueueFilesMetadata; class StorageS3Queue : public IStorage, WithContext { public: - using Configuration = typename StorageS3::Configuration; + using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr; StorageS3Queue( std::unique_ptr s3queue_settings_, - const Configuration & configuration_, + ConfigurationPtr configuration_, const StorageID & table_id_, const ColumnsDescription & columns_, const ConstraintsDescription & constraints_, @@ -47,7 +47,7 @@ public: size_t max_block_size, size_t num_streams) override; - const auto & getFormatName() const { return configuration.format; } + const auto & getFormatName() const { return configuration->format; } const fs::path & getZooKeeperPath() const { return zk_path; } @@ -62,7 +62,8 @@ private: const S3QueueAction after_processing; std::shared_ptr files_metadata; - Configuration configuration; + ConfigurationPtr configuration; + ObjectStoragePtr object_storage; const std::optional format_settings; @@ -81,6 +82,7 @@ private: void drop() override; bool supportsSubsetOfColumns(const ContextPtr & context_) const; bool supportsSubcolumns() const override { return true; } + bool supportsDynamicSubcolumns() const override { return true; } std::shared_ptr createFileIterator(ContextPtr local_context, const ActionsDAG::Node * predicate); std::shared_ptr createSource( @@ -96,7 +98,6 @@ private: void createOrCheckMetadata(const StorageInMemoryMetadata & storage_metadata); void checkTableStructure(const String & zookeeper_prefix, const StorageInMemoryMetadata & storage_metadata); - Configuration updateConfigurationAndGetCopy(ContextPtr local_context); }; } diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp deleted file mode 100644 index 365f93cc324..00000000000 --- a/src/Storages/StorageAzureBlob.cpp +++ /dev/null @@ -1,1643 +0,0 @@ -#include - -#if USE_AZURE_BLOB_STORAGE -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include - -#include - -namespace fs = std::filesystem; - -using namespace Azure::Storage::Blobs; - -namespace CurrentMetrics -{ - extern const Metric ObjectStorageAzureThreads; - extern const Metric ObjectStorageAzureThreadsActive; - extern const Metric ObjectStorageAzureThreadsScheduled; -} - -namespace ProfileEvents -{ - extern const Event EngineFileLikeReadFiles; -} - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int BAD_ARGUMENTS; - extern const int DATABASE_ACCESS_DENIED; - extern const int CANNOT_COMPILE_REGEXP; - extern const int CANNOT_EXTRACT_TABLE_STRUCTURE; - extern const int CANNOT_DETECT_FORMAT; - extern const int LOGICAL_ERROR; - extern const int NOT_IMPLEMENTED; -} - -namespace -{ - -const std::unordered_set required_configuration_keys = { - "blob_path", - "container", -}; - -const std::unordered_set optional_configuration_keys = { - "format", - "compression", - "structure", - "compression_method", - "account_name", - "account_key", - "connection_string", - "storage_account_url", -}; - -bool isConnectionString(const std::string & candidate) -{ - return !candidate.starts_with("http"); -} - -} - -void StorageAzureBlob::processNamedCollectionResult(StorageAzureBlob::Configuration & configuration, const NamedCollection & collection) -{ - validateNamedCollection(collection, required_configuration_keys, optional_configuration_keys); - - if (collection.has("connection_string")) - { - configuration.connection_url = collection.get("connection_string"); - configuration.is_connection_string = true; - } - - if (collection.has("storage_account_url")) - { - configuration.connection_url = collection.get("storage_account_url"); - configuration.is_connection_string = false; - } - - configuration.container = collection.get("container"); - configuration.blob_path = collection.get("blob_path"); - - if (collection.has("account_name")) - configuration.account_name = collection.get("account_name"); - - if (collection.has("account_key")) - configuration.account_key = collection.get("account_key"); - - configuration.structure = collection.getOrDefault("structure", "auto"); - configuration.format = collection.getOrDefault("format", configuration.format); - configuration.compression_method = collection.getOrDefault("compression_method", collection.getOrDefault("compression", "auto")); -} - - -StorageAzureBlob::Configuration StorageAzureBlob::getConfiguration(ASTs & engine_args, const ContextPtr & local_context) -{ - StorageAzureBlob::Configuration configuration; - - /// Supported signatures: - /// - /// AzureBlobStorage(connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression]) - /// - - if (auto named_collection = tryGetNamedCollectionWithOverrides(engine_args, local_context)) - { - processNamedCollectionResult(configuration, *named_collection); - - configuration.blobs_paths = {configuration.blob_path}; - - if (configuration.format == "auto") - configuration.format = FormatFactory::instance().tryGetFormatFromFileName(configuration.blob_path).value_or("auto"); - - return configuration; - } - - if (engine_args.size() < 3 || engine_args.size() > 7) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Storage AzureBlobStorage requires 3 to 7 arguments: " - "AzureBlobStorage(connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression])"); - - for (auto & engine_arg : engine_args) - engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, local_context); - - std::unordered_map engine_args_to_idx; - - configuration.connection_url = checkAndGetLiteralArgument(engine_args[0], "connection_string/storage_account_url"); - configuration.is_connection_string = isConnectionString(configuration.connection_url); - - configuration.container = checkAndGetLiteralArgument(engine_args[1], "container"); - configuration.blob_path = checkAndGetLiteralArgument(engine_args[2], "blobpath"); - - auto is_format_arg = [] (const std::string & s) -> bool - { - return s == "auto" || FormatFactory::instance().exists(s); - }; - - if (engine_args.size() == 4) - { - //'c1 UInt64, c2 UInt64 - auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); - if (is_format_arg(fourth_arg)) - { - configuration.format = fourth_arg; - } - else - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown format or account name specified without account key"); - } - } - else if (engine_args.size() == 5) - { - auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); - if (is_format_arg(fourth_arg)) - { - configuration.format = fourth_arg; - configuration.compression_method = checkAndGetLiteralArgument(engine_args[4], "compression"); - } - else - { - configuration.account_name = fourth_arg; - configuration.account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); - } - } - else if (engine_args.size() == 6) - { - auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); - if (fourth_arg == "auto" || FormatFactory::instance().exists(fourth_arg)) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Format and compression must be last arguments"); - } - else - { - configuration.account_name = fourth_arg; - - configuration.account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); - auto sixth_arg = checkAndGetLiteralArgument(engine_args[5], "format/account_name"); - if (!is_format_arg(sixth_arg)) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown format {}", sixth_arg); - configuration.format = sixth_arg; - } - } - else if (engine_args.size() == 7) - { - auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); - if (fourth_arg == "auto" || FormatFactory::instance().exists(fourth_arg)) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Format and compression must be last arguments"); - } - else - { - configuration.account_name = fourth_arg; - configuration.account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); - auto sixth_arg = checkAndGetLiteralArgument(engine_args[5], "format/account_name"); - if (!is_format_arg(sixth_arg)) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown format {}", sixth_arg); - configuration.format = sixth_arg; - configuration.compression_method = checkAndGetLiteralArgument(engine_args[6], "compression"); - } - } - - configuration.blobs_paths = {configuration.blob_path}; - - if (configuration.format == "auto") - configuration.format = FormatFactory::instance().tryGetFormatFromFileName(configuration.blob_path).value_or("auto"); - - return configuration; -} - - -AzureObjectStorage::SettingsPtr StorageAzureBlob::createSettings(const ContextPtr & local_context) -{ - const auto & context_settings = local_context->getSettingsRef(); - auto settings_ptr = std::make_unique(); - settings_ptr->max_single_part_upload_size = context_settings.azure_max_single_part_upload_size; - settings_ptr->max_single_read_retries = context_settings.azure_max_single_read_retries; - settings_ptr->strict_upload_part_size = context_settings.azure_strict_upload_part_size; - settings_ptr->max_upload_part_size = context_settings.azure_max_upload_part_size; - settings_ptr->max_blocks_in_multipart_upload = context_settings.azure_max_blocks_in_multipart_upload; - settings_ptr->min_upload_part_size = context_settings.azure_min_upload_part_size; - settings_ptr->list_object_keys_size = static_cast(context_settings.azure_list_object_keys_size); - - return settings_ptr; -} - -void registerStorageAzureBlob(StorageFactory & factory) -{ - factory.registerStorage("AzureBlobStorage", [](const StorageFactory::Arguments & args) - { - auto & engine_args = args.engine_args; - if (engine_args.empty()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "External data source must have arguments"); - - auto configuration = StorageAzureBlob::getConfiguration(engine_args, args.getLocalContext()); - auto client = StorageAzureBlob::createClient(configuration, /* is_read_only */ false); - // Use format settings from global server context + settings from - // the SETTINGS clause of the create query. Settings from current - // session and user are ignored. - std::optional format_settings; - if (args.storage_def->settings) - { - FormatFactorySettings user_format_settings; - - // Apply changed settings from global context, but ignore the - // unknown ones, because we only have the format settings here. - const auto & changes = args.getContext()->getSettingsRef().changes(); - for (const auto & change : changes) - { - if (user_format_settings.has(change.name)) - user_format_settings.set(change.name, change.value); - } - - // Apply changes from SETTINGS clause, with validation. - user_format_settings.applyChanges(args.storage_def->settings->changes); - format_settings = getFormatSettings(args.getContext(), user_format_settings); - } - else - { - format_settings = getFormatSettings(args.getContext()); - } - - ASTPtr partition_by; - if (args.storage_def->partition_by) - partition_by = args.storage_def->partition_by->clone(); - - auto settings = StorageAzureBlob::createSettings(args.getContext()); - - return std::make_shared( - configuration, - std::make_unique("AzureBlobStorage", std::move(client), std::move(settings), configuration.container, configuration.getConnectionURL().toString()), - args.getContext(), - args.table_id, - args.columns, - args.constraints, - args.comment, - format_settings, - /* distributed_processing */ false, - partition_by); - }, - { - .supports_settings = true, - .supports_sort_order = true, // for partition by - .supports_schema_inference = true, - .source_access_type = AccessType::AZURE, - }); -} - -static bool containerExists(std::unique_ptr &blob_service_client, std::string container_name) -{ - Azure::Storage::Blobs::ListBlobContainersOptions options; - options.Prefix = container_name; - options.PageSizeHint = 1; - - auto containers_list_response = blob_service_client->ListBlobContainers(options); - auto containers_list = containers_list_response.BlobContainers; - - for (const auto & container : containers_list) - { - if (container_name == container.Name) - return true; - } - return false; -} - -AzureClientPtr StorageAzureBlob::createClient(StorageAzureBlob::Configuration configuration, bool is_read_only, bool attempt_to_create_container) -{ - AzureClientPtr result; - - if (configuration.is_connection_string) - { - std::shared_ptr managed_identity_credential = std::make_shared(); - std::unique_ptr blob_service_client = std::make_unique(BlobServiceClient::CreateFromConnectionString(configuration.connection_url)); - result = std::make_unique(BlobContainerClient::CreateFromConnectionString(configuration.connection_url, configuration.container)); - - if (attempt_to_create_container) - { - bool container_exists = containerExists(blob_service_client,configuration.container); - if (!container_exists) - { - if (is_read_only) - throw Exception( - ErrorCodes::DATABASE_ACCESS_DENIED, - "AzureBlobStorage container does not exist '{}'", - configuration.container); - - try - { - result->CreateIfNotExists(); - } - catch (const Azure::Storage::StorageException & e) - { - if (!(e.StatusCode == Azure::Core::Http::HttpStatusCode::Conflict - && e.ReasonPhrase == "The specified container already exists.")) - { - throw; - } - } - } - } - } - else - { - std::shared_ptr storage_shared_key_credential; - if (configuration.account_name.has_value() && configuration.account_key.has_value()) - { - storage_shared_key_credential - = std::make_shared(*configuration.account_name, *configuration.account_key); - } - - std::unique_ptr blob_service_client; - size_t pos = configuration.connection_url.find('?'); - std::shared_ptr managed_identity_credential; - if (storage_shared_key_credential) - { - blob_service_client = std::make_unique(configuration.connection_url, storage_shared_key_credential); - } - else - { - /// If conneciton_url does not have '?', then its not SAS - if (pos == std::string::npos) - { - auto workload_identity_credential = std::make_shared(); - blob_service_client = std::make_unique(configuration.connection_url, workload_identity_credential); - } - else - { - managed_identity_credential = std::make_shared(); - blob_service_client = std::make_unique(configuration.connection_url, managed_identity_credential); - } - } - - std::string final_url; - if (pos != std::string::npos) - { - auto url_without_sas = configuration.connection_url.substr(0, pos); - final_url = url_without_sas + (url_without_sas.back() == '/' ? "" : "/") + configuration.container - + configuration.connection_url.substr(pos); - } - else - final_url - = configuration.connection_url + (configuration.connection_url.back() == '/' ? "" : "/") + configuration.container; - - if (!attempt_to_create_container) - { - if (storage_shared_key_credential) - return std::make_unique(final_url, storage_shared_key_credential); - else - return std::make_unique(final_url, managed_identity_credential); - } - - bool container_exists = containerExists(blob_service_client,configuration.container); - if (container_exists) - { - if (storage_shared_key_credential) - result = std::make_unique(final_url, storage_shared_key_credential); - else - { - /// If conneciton_url does not have '?', then its not SAS - if (pos == std::string::npos) - { - auto workload_identity_credential = std::make_shared(); - result = std::make_unique(final_url, workload_identity_credential); - } - else - result = std::make_unique(final_url, managed_identity_credential); - } - } - else - { - if (is_read_only) - throw Exception( - ErrorCodes::DATABASE_ACCESS_DENIED, - "AzureBlobStorage container does not exist '{}'", - configuration.container); - try - { - result = std::make_unique(blob_service_client->CreateBlobContainer(configuration.container).Value); - } - catch (const Azure::Storage::StorageException & e) - { - if (e.StatusCode == Azure::Core::Http::HttpStatusCode::Conflict - && e.ReasonPhrase == "The specified container already exists.") - { - if (storage_shared_key_credential) - result = std::make_unique(final_url, storage_shared_key_credential); - else - { - /// If conneciton_url does not have '?', then its not SAS - if (pos == std::string::npos) - { - auto workload_identity_credential = std::make_shared(); - result = std::make_unique(final_url, workload_identity_credential); - } - else - result = std::make_unique(final_url, managed_identity_credential); - } - } - else - { - throw; - } - } - } - } - - return result; -} - -Poco::URI StorageAzureBlob::Configuration::getConnectionURL() const -{ - if (!is_connection_string) - return Poco::URI(connection_url); - - auto parsed_connection_string = Azure::Storage::_internal::ParseConnectionString(connection_url); - return Poco::URI(parsed_connection_string.BlobServiceUrl.GetAbsoluteUrl()); -} - -bool StorageAzureBlob::Configuration::withGlobsIgnorePartitionWildcard() const -{ - if (!withPartitionWildcard()) - return withGlobs(); - - return PartitionedSink::replaceWildcards(getPath(), "").find_first_of("*?{") != std::string::npos; -} - -StorageAzureBlob::StorageAzureBlob( - const Configuration & configuration_, - std::unique_ptr && object_storage_, - const ContextPtr & context, - const StorageID & table_id_, - const ColumnsDescription & columns_, - const ConstraintsDescription & constraints_, - const String & comment, - std::optional format_settings_, - bool distributed_processing_, - ASTPtr partition_by_) - : IStorage(table_id_) - , name("AzureBlobStorage") - , configuration(configuration_) - , object_storage(std::move(object_storage_)) - , distributed_processing(distributed_processing_) - , format_settings(format_settings_) - , partition_by(partition_by_) -{ - if (configuration.format != "auto") - FormatFactory::instance().checkFormatName(configuration.format); - context->getGlobalContext()->getRemoteHostFilter().checkURL(configuration.getConnectionURL()); - - StorageInMemoryMetadata storage_metadata; - if (columns_.empty()) - { - ColumnsDescription columns; - if (configuration.format == "auto") - std::tie(columns, configuration.format) = getTableStructureAndFormatFromData(object_storage.get(), configuration, format_settings, context); - else - columns = getTableStructureFromData(object_storage.get(), configuration, format_settings, context); - storage_metadata.setColumns(columns); - } - else - { - if (configuration.format == "auto") - configuration.format = getTableStructureAndFormatFromData(object_storage.get(), configuration, format_settings, context).second; - - /// We don't allow special columns in File storage. - if (!columns_.hasOnlyOrdinary()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table engine AzureBlobStorage doesn't support special columns like MATERIALIZED, ALIAS or EPHEMERAL"); - storage_metadata.setColumns(columns_); - } - - storage_metadata.setConstraints(constraints_); - storage_metadata.setComment(comment); - setInMemoryMetadata(storage_metadata); - setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns())); - - StoredObjects objects; - for (const auto & key : configuration.blobs_paths) - objects.emplace_back(key); -} - -void StorageAzureBlob::truncate(const ASTPtr &, const StorageMetadataPtr &, ContextPtr, TableExclusiveLockHolder &) -{ - if (configuration.withGlobs()) - { - throw Exception( - ErrorCodes::DATABASE_ACCESS_DENIED, - "AzureBlobStorage key '{}' contains globs, so the table is in readonly mode", - configuration.blob_path); - } - - StoredObjects objects; - for (const auto & key : configuration.blobs_paths) - objects.emplace_back(key); - - object_storage->removeObjectsIfExist(objects); -} - -namespace -{ - -class StorageAzureBlobSink : public SinkToStorage -{ -public: - StorageAzureBlobSink( - const String & format, - const Block & sample_block_, - const ContextPtr & context, - std::optional format_settings_, - const CompressionMethod compression_method, - AzureObjectStorage * object_storage, - const String & blob_path) - : SinkToStorage(sample_block_) - , sample_block(sample_block_) - , format_settings(format_settings_) - { - StoredObject object(blob_path); - const auto & settings = context->getSettingsRef(); - write_buf = wrapWriteBufferWithCompressionMethod( - object_storage->writeObject(object, WriteMode::Rewrite), - compression_method, - static_cast(settings.output_format_compression_level), - static_cast(settings.output_format_compression_zstd_window_log)); - writer = FormatFactory::instance().getOutputFormatParallelIfPossible(format, *write_buf, sample_block, context, format_settings); - } - - String getName() const override { return "StorageAzureBlobSink"; } - - void consume(Chunk chunk) override - { - std::lock_guard lock(cancel_mutex); - if (cancelled) - return; - writer->write(getHeader().cloneWithColumns(chunk.detachColumns())); - } - - void onCancel() override - { - std::lock_guard lock(cancel_mutex); - finalize(); - cancelled = true; - } - - void onException(std::exception_ptr exception) override - { - std::lock_guard lock(cancel_mutex); - try - { - std::rethrow_exception(exception); - } - catch (...) - { - /// An exception context is needed to proper delete write buffers without finalization - release(); - } - } - - void onFinish() override - { - std::lock_guard lock(cancel_mutex); - finalize(); - } - -private: - void finalize() - { - if (!writer) - return; - - try - { - writer->finalize(); - writer->flush(); - write_buf->finalize(); - } - catch (...) - { - /// Stop ParallelFormattingOutputFormat correctly. - release(); - throw; - } - } - - void release() - { - writer.reset(); - write_buf->finalize(); - } - - Block sample_block; - std::optional format_settings; - std::unique_ptr write_buf; - OutputFormatPtr writer; - bool cancelled = false; - std::mutex cancel_mutex; -}; - -namespace -{ - std::optional checkAndGetNewFileOnInsertIfNeeded(const ContextPtr & context, AzureObjectStorage * object_storage, const String & path, size_t sequence_number) - { - if (context->getSettingsRef().azure_truncate_on_insert || !object_storage->exists(StoredObject(path))) - return std::nullopt; - - if (context->getSettingsRef().azure_create_new_file_on_insert) - { - auto pos = path.find_first_of('.'); - String new_path; - do - { - new_path = path.substr(0, pos) + "." + std::to_string(sequence_number) + (pos == std::string::npos ? "" : path.substr(pos)); - ++sequence_number; - } - while (object_storage->exists(StoredObject(new_path))); - - return new_path; - } - - throw Exception( - ErrorCodes::BAD_ARGUMENTS, - "Object with key {} already exists. " - "If you want to overwrite it, enable setting azure_truncate_on_insert, if you " - "want to create a new file on each insert, enable setting azure_create_new_file_on_insert", - path); - } -} - -class PartitionedStorageAzureBlobSink : public PartitionedSink, WithContext -{ -public: - PartitionedStorageAzureBlobSink( - const ASTPtr & partition_by, - const String & format_, - const Block & sample_block_, - const ContextPtr & context_, - std::optional format_settings_, - const CompressionMethod compression_method_, - AzureObjectStorage * object_storage_, - const String & blob_) - : PartitionedSink(partition_by, context_, sample_block_), WithContext(context_) - , format(format_) - , sample_block(sample_block_) - , compression_method(compression_method_) - , object_storage(object_storage_) - , blob(blob_) - , format_settings(format_settings_) - { - } - - SinkPtr createSinkForPartition(const String & partition_id) override - { - auto partition_key = replaceWildcards(blob, partition_id); - validateKey(partition_key); - if (auto new_path = checkAndGetNewFileOnInsertIfNeeded(getContext(), object_storage, partition_key, 1)) - partition_key = *new_path; - - return std::make_shared( - format, - sample_block, - getContext(), - format_settings, - compression_method, - object_storage, - partition_key - ); - } - -private: - const String format; - const Block sample_block; - const CompressionMethod compression_method; - AzureObjectStorage * object_storage; - const String blob; - const std::optional format_settings; - - ExpressionActionsPtr partition_by_expr; - - static void validateKey(const String & str) - { - validatePartitionKey(str, true); - } -}; - -} - -class ReadFromAzureBlob : public SourceStepWithFilter -{ -public: - std::string getName() const override { return "ReadFromAzureBlob"; } - void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; - void applyFilters(ActionDAGNodes added_filter_nodes) override; - - ReadFromAzureBlob( - const Names & column_names_, - const SelectQueryInfo & query_info_, - const StorageSnapshotPtr & storage_snapshot_, - const ContextPtr & context_, - Block sample_block, - std::shared_ptr storage_, - ReadFromFormatInfo info_, - const bool need_only_count_, - size_t max_block_size_, - size_t num_streams_) - : SourceStepWithFilter(DataStream{.header = std::move(sample_block)}, column_names_, query_info_, storage_snapshot_, context_) - , storage(std::move(storage_)) - , info(std::move(info_)) - , need_only_count(need_only_count_) - , max_block_size(max_block_size_) - , num_streams(num_streams_) - { - } - -private: - std::shared_ptr storage; - ReadFromFormatInfo info; - const bool need_only_count; - - size_t max_block_size; - const size_t num_streams; - - std::shared_ptr iterator_wrapper; - - void createIterator(const ActionsDAG::Node * predicate); -}; - -void ReadFromAzureBlob::applyFilters(ActionDAGNodes added_filter_nodes) -{ - SourceStepWithFilter::applyFilters(std::move(added_filter_nodes)); - - const ActionsDAG::Node * predicate = nullptr; - if (filter_actions_dag) - predicate = filter_actions_dag->getOutputs().at(0); - - createIterator(predicate); -} - -void StorageAzureBlob::read( - QueryPlan & query_plan, - const Names & column_names, - const StorageSnapshotPtr & storage_snapshot, - SelectQueryInfo & query_info, - ContextPtr local_context, - QueryProcessingStage::Enum /*processed_stage*/, - size_t max_block_size, - size_t num_streams) -{ - if (partition_by && configuration.withPartitionWildcard()) - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Reading from a partitioned Azure storage is not implemented yet"); - - auto this_ptr = std::static_pointer_cast(shared_from_this()); - - auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context)); - bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty()) - && local_context->getSettingsRef().optimize_count_from_files; - - auto reading = std::make_unique( - column_names, - query_info, - storage_snapshot, - local_context, - read_from_format_info.source_header, - std::move(this_ptr), - std::move(read_from_format_info), - need_only_count, - max_block_size, - num_streams); - - query_plan.addStep(std::move(reading)); -} - -void ReadFromAzureBlob::createIterator(const ActionsDAG::Node * predicate) -{ - if (iterator_wrapper) - return; - - const auto & configuration = storage->configuration; - - if (storage->distributed_processing) - { - iterator_wrapper = std::make_shared(context, - context->getReadTaskCallback()); - } - else if (configuration.withGlobs()) - { - /// Iterate through disclosed globs and make a source for each file - iterator_wrapper = std::make_shared( - storage->object_storage.get(), configuration.container, configuration.blob_path, - predicate, storage->getVirtualsList(), context, nullptr, context->getFileProgressCallback()); - } - else - { - iterator_wrapper = std::make_shared( - storage->object_storage.get(), configuration.container, configuration.blobs_paths, - predicate, storage->getVirtualsList(), context, nullptr, context->getFileProgressCallback()); - } -} - -void ReadFromAzureBlob::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) -{ - createIterator(nullptr); - - const auto & configuration = storage->configuration; - Pipes pipes; - - for (size_t i = 0; i < num_streams; ++i) - { - pipes.emplace_back(std::make_shared( - info, - configuration.format, - getName(), - context, - storage->format_settings, - max_block_size, - configuration.compression_method, - storage->object_storage.get(), - configuration.container, - configuration.connection_url, - iterator_wrapper, - need_only_count)); - } - - auto pipe = Pipe::unitePipes(std::move(pipes)); - if (pipe.empty()) - pipe = Pipe(std::make_shared(info.source_header)); - - for (const auto & processor : pipe.getProcessors()) - processors.emplace_back(processor); - - pipeline.init(std::move(pipe)); -} - -SinkToStoragePtr StorageAzureBlob::write(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context, bool /*async_insert*/) -{ - if (configuration.withGlobsIgnorePartitionWildcard()) - throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED, - "AzureBlobStorage key '{}' contains globs, so the table is in readonly mode", configuration.blob_path); - - auto path = configuration.blobs_paths.front(); - auto sample_block = metadata_snapshot->getSampleBlock(); - auto chosen_compression_method = chooseCompressionMethod(path, configuration.compression_method); - auto insert_query = std::dynamic_pointer_cast(query); - - auto partition_by_ast = insert_query ? (insert_query->partition_by ? insert_query->partition_by : partition_by) : nullptr; - bool is_partitioned_implementation = partition_by_ast && configuration.withPartitionWildcard(); - - if (is_partitioned_implementation) - { - return std::make_shared( - partition_by_ast, - configuration.format, - sample_block, - local_context, - format_settings, - chosen_compression_method, - object_storage.get(), - path); - } - else - { - if (auto new_path = checkAndGetNewFileOnInsertIfNeeded(local_context, object_storage.get(), path, configuration.blobs_paths.size())) - { - configuration.blobs_paths.push_back(*new_path); - path = *new_path; - } - - return std::make_shared( - configuration.format, - sample_block, - local_context, - format_settings, - chosen_compression_method, - object_storage.get(), - path); - } -} - -bool StorageAzureBlob::supportsPartitionBy() const -{ - return true; -} - -bool StorageAzureBlob::supportsSubsetOfColumns(const ContextPtr & context) const -{ - return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration.format, context, format_settings); -} - -bool StorageAzureBlob::prefersLargeBlocks() const -{ - return FormatFactory::instance().checkIfOutputFormatPrefersLargeBlocks(configuration.format); -} - -bool StorageAzureBlob::parallelizeOutputAfterReading(ContextPtr context) const -{ - return FormatFactory::instance().checkParallelizeOutputAfterReading(configuration.format, context); -} - -StorageAzureBlobSource::GlobIterator::GlobIterator( - AzureObjectStorage * object_storage_, - const std::string & container_, - String blob_path_with_globs_, - const ActionsDAG::Node * predicate, - const NamesAndTypesList & virtual_columns_, - const ContextPtr & context_, - RelativePathsWithMetadata * outer_blobs_, - std::function file_progress_callback_) - : IIterator(context_) - , object_storage(object_storage_) - , container(container_) - , blob_path_with_globs(blob_path_with_globs_) - , virtual_columns(virtual_columns_) - , outer_blobs(outer_blobs_) - , file_progress_callback(file_progress_callback_) -{ - - const String key_prefix = blob_path_with_globs.substr(0, blob_path_with_globs.find_first_of("*?{")); - - /// We don't have to list bucket, because there is no asterisks. - if (key_prefix.size() == blob_path_with_globs.size()) - { - auto object_metadata = object_storage->getObjectMetadata(blob_path_with_globs); - blobs_with_metadata.emplace_back( - blob_path_with_globs, - object_metadata); - if (outer_blobs) - outer_blobs->emplace_back(blobs_with_metadata.back()); - if (file_progress_callback) - file_progress_callback(FileProgress(0, object_metadata.size_bytes)); - is_finished = true; - return; - } - - object_storage_iterator = object_storage->iterate(key_prefix); - - matcher = std::make_unique(makeRegexpPatternFromGlobs(blob_path_with_globs)); - - if (!matcher->ok()) - throw Exception( - ErrorCodes::CANNOT_COMPILE_REGEXP, "Cannot compile regex from glob ({}): {}", blob_path_with_globs, matcher->error()); - - recursive = blob_path_with_globs == "/**" ? true : false; - - filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns); -} - -RelativePathWithMetadata StorageAzureBlobSource::GlobIterator::next() -{ - std::lock_guard lock(next_mutex); - - if (is_finished && index >= blobs_with_metadata.size()) - { - return {}; - } - - bool need_new_batch = blobs_with_metadata.empty() || index >= blobs_with_metadata.size(); - - if (need_new_batch) - { - RelativePathsWithMetadata new_batch; - while (new_batch.empty()) - { - auto result = object_storage_iterator->getCurrrentBatchAndScheduleNext(); - if (result.has_value()) - { - new_batch = result.value(); - } - else - { - is_finished = true; - return {}; - } - - for (auto it = new_batch.begin(); it != new_batch.end();) - { - if (!recursive && !re2::RE2::FullMatch(it->relative_path, *matcher)) - it = new_batch.erase(it); - else - ++it; - } - } - - index = 0; - - if (filter_dag) - { - std::vector paths; - paths.reserve(new_batch.size()); - for (auto & path_with_metadata : new_batch) - paths.push_back(fs::path(container) / path_with_metadata.relative_path); - - VirtualColumnUtils::filterByPathOrFile(new_batch, paths, filter_dag, virtual_columns, getContext()); - } - - if (outer_blobs) - outer_blobs->insert(outer_blobs->end(), new_batch.begin(), new_batch.end()); - - blobs_with_metadata = std::move(new_batch); - if (file_progress_callback) - { - for (const auto & [relative_path, info] : blobs_with_metadata) - { - file_progress_callback(FileProgress(0, info.size_bytes)); - } - } - } - - size_t current_index = index++; - if (current_index >= blobs_with_metadata.size()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Index out of bound for blob metadata"); - return blobs_with_metadata[current_index]; -} - -StorageAzureBlobSource::KeysIterator::KeysIterator( - AzureObjectStorage * object_storage_, - const std::string & container_, - const Strings & keys_, - const ActionsDAG::Node * predicate, - const NamesAndTypesList & virtual_columns_, - const ContextPtr & context_, - RelativePathsWithMetadata * outer_blobs, - std::function file_progress_callback) - : IIterator(context_) - , object_storage(object_storage_) - , container(container_) - , virtual_columns(virtual_columns_) -{ - Strings all_keys = keys_; - - ASTPtr filter_ast; - if (!all_keys.empty()) - filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns); - - if (filter_dag) - { - Strings paths; - paths.reserve(all_keys.size()); - for (const auto & key : all_keys) - paths.push_back(fs::path(container) / key); - - VirtualColumnUtils::filterByPathOrFile(all_keys, paths, filter_dag, virtual_columns, getContext()); - } - - for (auto && key : all_keys) - { - ObjectMetadata object_metadata = object_storage->getObjectMetadata(key); - if (file_progress_callback) - file_progress_callback(FileProgress(0, object_metadata.size_bytes)); - keys.emplace_back(key, object_metadata); - } - - if (outer_blobs) - *outer_blobs = keys; -} - -RelativePathWithMetadata StorageAzureBlobSource::KeysIterator::next() -{ - size_t current_index = index.fetch_add(1, std::memory_order_relaxed); - if (current_index >= keys.size()) - return {}; - - return keys[current_index]; -} - -Chunk StorageAzureBlobSource::generate() -{ - while (true) - { - if (isCancelled() || !reader) - { - if (reader) - reader->cancel(); - break; - } - - Chunk chunk; - if (reader->pull(chunk)) - { - UInt64 num_rows = chunk.getNumRows(); - total_rows_in_file += num_rows; - size_t chunk_size = 0; - if (const auto * input_format = reader.getInputFormat()) - chunk_size = input_format->getApproxBytesReadForChunk(); - progress(num_rows, chunk_size ? chunk_size : chunk.bytes()); - VirtualColumnUtils::addRequestedPathFileAndSizeVirtualsToChunk( - chunk, - requested_virtual_columns, - fs::path(container) / reader.getRelativePath(), - reader.getRelativePathWithMetadata().metadata.size_bytes); - return chunk; - } - - if (reader.getInputFormat() && getContext()->getSettingsRef().use_cache_for_count_from_files) - addNumRowsToCache(reader.getRelativePath(), total_rows_in_file); - - total_rows_in_file = 0; - - assert(reader_future.valid()); - reader = reader_future.get(); - - if (!reader) - break; - - /// Even if task is finished the thread may be not freed in pool. - /// So wait until it will be freed before scheduling a new task. - create_reader_pool.wait(); - reader_future = createReaderAsync(); - } - - return {}; -} - -void StorageAzureBlobSource::addNumRowsToCache(const String & path, size_t num_rows) -{ - String source = fs::path(connection_url) / container / path; - auto cache_key = getKeyForSchemaCache(source, format, format_settings, getContext()); - StorageAzureBlob::getSchemaCache(getContext()).addNumRows(cache_key, num_rows); -} - -std::optional StorageAzureBlobSource::tryGetNumRowsFromCache(const DB::RelativePathWithMetadata & path_with_metadata) -{ - String source = fs::path(connection_url) / container / path_with_metadata.relative_path; - auto cache_key = getKeyForSchemaCache(source, format, format_settings, getContext()); - auto get_last_mod_time = [&]() -> std::optional - { - auto last_mod = path_with_metadata.metadata.last_modified; - if (last_mod) - return last_mod->epochTime(); - return std::nullopt; - }; - - return StorageAzureBlob::getSchemaCache(getContext()).tryGetNumRows(cache_key, get_last_mod_time); -} - -StorageAzureBlobSource::StorageAzureBlobSource( - const ReadFromFormatInfo & info, - const String & format_, - String name_, - const ContextPtr & context_, - std::optional format_settings_, - UInt64 max_block_size_, - String compression_hint_, - AzureObjectStorage * object_storage_, - const String & container_, - const String & connection_url_, - std::shared_ptr file_iterator_, - bool need_only_count_) - :ISource(info.source_header, false) - , WithContext(context_) - , requested_columns(info.requested_columns) - , requested_virtual_columns(info.requested_virtual_columns) - , format(format_) - , name(std::move(name_)) - , sample_block(info.format_header) - , format_settings(format_settings_) - , columns_desc(info.columns_description) - , max_block_size(max_block_size_) - , compression_hint(compression_hint_) - , object_storage(std::move(object_storage_)) - , container(container_) - , connection_url(connection_url_) - , file_iterator(file_iterator_) - , need_only_count(need_only_count_) - , create_reader_pool(CurrentMetrics::ObjectStorageAzureThreads, CurrentMetrics::ObjectStorageAzureThreadsActive, CurrentMetrics::ObjectStorageAzureThreadsScheduled, 1) - , create_reader_scheduler(threadPoolCallbackRunnerUnsafe(create_reader_pool, "AzureReader")) -{ - reader = createReader(); - if (reader) - reader_future = createReaderAsync(); -} - - -StorageAzureBlobSource::~StorageAzureBlobSource() -{ - create_reader_pool.wait(); -} - -String StorageAzureBlobSource::getName() const -{ - return name; -} - -StorageAzureBlobSource::ReaderHolder StorageAzureBlobSource::createReader() -{ - auto path_with_metadata = file_iterator->next(); - if (path_with_metadata.relative_path.empty()) - return {}; - - if (path_with_metadata.metadata.size_bytes == 0) - path_with_metadata.metadata = object_storage->getObjectMetadata(path_with_metadata.relative_path); - - QueryPipelineBuilder builder; - std::shared_ptr source; - std::unique_ptr read_buf; - std::optional num_rows_from_cache = need_only_count && getContext()->getSettingsRef().use_cache_for_count_from_files - ? tryGetNumRowsFromCache(path_with_metadata) : std::nullopt; - if (num_rows_from_cache) - { - /// We should not return single chunk with all number of rows, - /// because there is a chance that this chunk will be materialized later - /// (it can cause memory problems even with default values in columns or when virtual columns are requested). - /// Instead, we use special ConstChunkGenerator that will generate chunks - /// with max_block_size rows until total number of rows is reached. - source = std::make_shared(sample_block, *num_rows_from_cache, max_block_size); - builder.init(Pipe(source)); - } - else - { - std::optional max_parsing_threads; - if (need_only_count) - max_parsing_threads = 1; - - auto compression_method = chooseCompressionMethod(path_with_metadata.relative_path, compression_hint); - read_buf = createAzureReadBuffer(path_with_metadata.relative_path, path_with_metadata.metadata.size_bytes); - auto input_format = FormatFactory::instance().getInput( - format, *read_buf, sample_block, getContext(), max_block_size, - format_settings, max_parsing_threads, std::nullopt, - /* is_remote_fs */ true, compression_method); - - if (need_only_count) - input_format->needOnlyCount(); - - builder.init(Pipe(input_format)); - - if (columns_desc.hasDefaults()) - { - builder.addSimpleTransform( - [&](const Block & header) - { return std::make_shared(header, columns_desc, *input_format, getContext()); }); - } - - source = input_format; - } - - /// Add ExtractColumnsTransform to extract requested columns/subcolumns - /// from chunk read by IInputFormat. - builder.addSimpleTransform([&](const Block & header) - { - return std::make_shared(header, requested_columns); - }); - - auto pipeline = std::make_unique(QueryPipelineBuilder::getPipeline(std::move(builder))); - auto current_reader = std::make_unique(*pipeline); - - ProfileEvents::increment(ProfileEvents::EngineFileLikeReadFiles); - - return ReaderHolder{path_with_metadata, std::move(read_buf), std::move(source), std::move(pipeline), std::move(current_reader)}; -} - -std::future StorageAzureBlobSource::createReaderAsync() -{ - return create_reader_scheduler([this] { return createReader(); }, Priority{}); -} - -std::unique_ptr StorageAzureBlobSource::createAzureReadBuffer(const String & key, size_t object_size) -{ - auto read_settings = getContext()->getReadSettings().adjustBufferSize(object_size); - read_settings.enable_filesystem_cache = false; - auto download_buffer_size = getContext()->getSettings().max_download_buffer_size; - const bool object_too_small = object_size <= 2 * download_buffer_size; - - // Create a read buffer that will prefetch the first ~1 MB of the file. - // When reading lots of tiny files, this prefetching almost doubles the throughput. - // For bigger files, parallel reading is more useful. - if (object_too_small && read_settings.remote_fs_method == RemoteFSReadMethod::threadpool) - { - LOG_TRACE(log, "Downloading object of size {} from Azure with initial prefetch", object_size); - return createAsyncAzureReadBuffer(key, read_settings, object_size); - } - - return object_storage->readObject(StoredObject(key), read_settings, {}, object_size); -} - -namespace -{ - class ReadBufferIterator : public IReadBufferIterator, WithContext - { - public: - ReadBufferIterator( - const std::shared_ptr & file_iterator_, - AzureObjectStorage * object_storage_, - std::optional format_, - const StorageAzureBlob::Configuration & configuration_, - const std::optional & format_settings_, - const RelativePathsWithMetadata & read_keys_, - const ContextPtr & context_) - : WithContext(context_) - , file_iterator(file_iterator_) - , object_storage(object_storage_) - , configuration(configuration_) - , format(std::move(format_)) - , format_settings(format_settings_) - , read_keys(read_keys_) - , prev_read_keys_size(read_keys_.size()) - { - } - - Data next() override - { - /// For default mode check cached columns for currently read keys on first iteration. - if (first) - { - /// If format is unknown we iterate through all currently read keys on first iteration and - /// try to determine format by file name. - if (!format) - { - for (const auto & key : read_keys) - { - if (auto format_from_path = FormatFactory::instance().tryGetFormatFromFileName(key.relative_path)) - { - format = format_from_path; - break; - } - } - } - - /// For default mode check cached columns for currently read keys on first iteration. - if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT) - { - if (auto cached_columns = tryGetColumnsFromCache(read_keys.begin(), read_keys.end())) - return {nullptr, cached_columns, format}; - } - } - - current_path_with_metadata = file_iterator->next(); - - if (current_path_with_metadata.relative_path.empty()) - { - if (first) - { - if (format) - throw Exception( - ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, - "The table structure cannot be extracted from a {} format file, because there are no files with provided path " - "in AzureBlobStorage. You can specify table structure manually", *format); - - throw Exception( - ErrorCodes::CANNOT_DETECT_FORMAT, - "The data format cannot be detected by the contents of the files, because there are no files with provided path " - "in AzureBlobStorage. You can specify table structure manually"); - } - - return {nullptr, std::nullopt, format}; - } - - first = false; - - /// AzureBlobStorage file iterator could get new keys after new iteration. - if (read_keys.size() > prev_read_keys_size) - { - /// If format is unknown we can try to determine it by new file names. - if (!format) - { - for (auto it = read_keys.begin() + prev_read_keys_size; it != read_keys.end(); ++it) - { - if (auto format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName((*it).relative_path)) - { - format = format_from_file_name; - break; - } - } - } - /// Check new files in schema cache if schema inference mode is default. - if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT) - { - auto columns_from_cache = tryGetColumnsFromCache(read_keys.begin() + prev_read_keys_size, read_keys.end()); - if (columns_from_cache) - return {nullptr, columns_from_cache, format}; - } - - prev_read_keys_size = read_keys.size(); - } - - if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::UNION) - { - RelativePathsWithMetadata paths = {current_path_with_metadata}; - if (auto columns_from_cache = tryGetColumnsFromCache(paths.begin(), paths.end())) - return {nullptr, columns_from_cache, format}; - } - - first = false; - int zstd_window_log_max = static_cast(getContext()->getSettingsRef().zstd_window_log_max); - return {wrapReadBufferWithCompressionMethod( - object_storage->readObject(StoredObject(current_path_with_metadata.relative_path), getContext()->getReadSettings(), {}, current_path_with_metadata.metadata.size_bytes), - chooseCompressionMethod(current_path_with_metadata.relative_path, configuration.compression_method), - zstd_window_log_max), std::nullopt, format}; - } - - void setNumRowsToLastFile(size_t num_rows) override - { - if (!getContext()->getSettingsRef().schema_inference_use_cache_for_azure) - return; - - String source = fs::path(configuration.connection_url) / configuration.container / current_path_with_metadata.relative_path; - auto key = getKeyForSchemaCache(source, *format, format_settings, getContext()); - StorageAzureBlob::getSchemaCache(getContext()).addNumRows(key, num_rows); - } - - void setSchemaToLastFile(const ColumnsDescription & columns) override - { - if (!getContext()->getSettingsRef().schema_inference_use_cache_for_azure - || getContext()->getSettingsRef().schema_inference_mode != SchemaInferenceMode::UNION) - return; - - String source = fs::path(configuration.connection_url) / configuration.container / current_path_with_metadata.relative_path; - auto key = getKeyForSchemaCache(source, *format, format_settings, getContext()); - StorageAzureBlob::getSchemaCache(getContext()).addColumns(key, columns); - } - - void setResultingSchema(const ColumnsDescription & columns) override - { - if (!getContext()->getSettingsRef().schema_inference_use_cache_for_azure - || getContext()->getSettingsRef().schema_inference_mode != SchemaInferenceMode::DEFAULT) - return; - - auto host_and_bucket = configuration.connection_url + '/' + configuration.container; - Strings sources; - sources.reserve(read_keys.size()); - std::transform(read_keys.begin(), read_keys.end(), std::back_inserter(sources), [&](const auto & elem){ return host_and_bucket + '/' + elem.relative_path; }); - auto cache_keys = getKeysForSchemaCache(sources, *format, format_settings, getContext()); - StorageAzureBlob::getSchemaCache(getContext()).addManyColumns(cache_keys, columns); - } - - void setFormatName(const String & format_name) override - { - format = format_name; - } - - String getLastFileName() const override { return current_path_with_metadata.relative_path; } - - bool supportsLastReadBufferRecreation() const override { return true; } - - std::unique_ptr recreateLastReadBuffer() override - { - int zstd_window_log_max = static_cast(getContext()->getSettingsRef().zstd_window_log_max); - return wrapReadBufferWithCompressionMethod( - object_storage->readObject(StoredObject(current_path_with_metadata.relative_path), getContext()->getReadSettings(), {}, current_path_with_metadata.metadata.size_bytes), - chooseCompressionMethod(current_path_with_metadata.relative_path, configuration.compression_method), - zstd_window_log_max); - } - - private: - std::optional tryGetColumnsFromCache(const RelativePathsWithMetadata::const_iterator & begin, const RelativePathsWithMetadata::const_iterator & end) - { - auto context = getContext(); - if (!context->getSettingsRef().schema_inference_use_cache_for_azure) - return std::nullopt; - - auto & schema_cache = StorageAzureBlob::getSchemaCache(context); - for (auto it = begin; it < end; ++it) - { - auto get_last_mod_time = [&] -> std::optional - { - if (it->metadata.last_modified) - return it->metadata.last_modified->epochTime(); - return std::nullopt; - }; - - auto host_and_bucket = configuration.connection_url + '/' + configuration.container; - String source = host_and_bucket + '/' + it->relative_path; - if (format) - { - auto cache_key = getKeyForSchemaCache(source, *format, format_settings, context); - if (auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time)) - return columns; - } - else - { - /// If format is unknown, we can iterate through all possible input formats - /// and check if we have an entry with this format and this file in schema cache. - /// If we have such entry for some format, we can use this format to read the file. - for (const auto & format_name : FormatFactory::instance().getAllInputFormats()) - { - auto cache_key = getKeyForSchemaCache(source, format_name, format_settings, context); - if (auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time)) - { - /// Now format is known. It should be the same for all files. - format = format_name; - return columns; - } - } - } - } - - return std::nullopt; - } - - std::shared_ptr file_iterator; - AzureObjectStorage * object_storage; - const StorageAzureBlob::Configuration & configuration; - std::optional format; - const std::optional & format_settings; - const RelativePathsWithMetadata & read_keys; - size_t prev_read_keys_size; - RelativePathWithMetadata current_path_with_metadata; - bool first = true; - }; -} - -std::pair StorageAzureBlob::getTableStructureAndFormatFromDataImpl( - std::optional format, - AzureObjectStorage * object_storage, - const Configuration & configuration, - const std::optional & format_settings, - const ContextPtr & ctx) -{ - RelativePathsWithMetadata read_keys; - std::shared_ptr file_iterator; - if (configuration.withGlobs()) - { - file_iterator = std::make_shared( - object_storage, configuration.container, configuration.blob_path, nullptr, NamesAndTypesList{}, ctx, &read_keys); - } - else - { - file_iterator = std::make_shared( - object_storage, configuration.container, configuration.blobs_paths, nullptr, NamesAndTypesList{}, ctx, &read_keys); - } - - ReadBufferIterator read_buffer_iterator(file_iterator, object_storage, format, configuration, format_settings, read_keys, ctx); - if (format) - return {readSchemaFromFormat(*format, format_settings, read_buffer_iterator, ctx), *format}; - return detectFormatAndReadSchema(format_settings, read_buffer_iterator, ctx); -} - -std::pair StorageAzureBlob::getTableStructureAndFormatFromData( - DB::AzureObjectStorage * object_storage, - const DB::StorageAzureBlob::Configuration & configuration, - const std::optional & format_settings, - const DB::ContextPtr & ctx) -{ - return getTableStructureAndFormatFromDataImpl(std::nullopt, object_storage, configuration, format_settings, ctx); -} - -ColumnsDescription StorageAzureBlob::getTableStructureFromData( - DB::AzureObjectStorage * object_storage, - const DB::StorageAzureBlob::Configuration & configuration, - const std::optional & format_settings, - const DB::ContextPtr & ctx) -{ - return getTableStructureAndFormatFromDataImpl(configuration.format, object_storage, configuration, format_settings, ctx).first; -} - -SchemaCache & StorageAzureBlob::getSchemaCache(const ContextPtr & ctx) -{ - static SchemaCache schema_cache(ctx->getConfigRef().getUInt("schema_inference_cache_max_elements_for_azure", DEFAULT_SCHEMA_CACHE_ELEMENTS)); - return schema_cache; -} - - -std::unique_ptr StorageAzureBlobSource::createAsyncAzureReadBuffer( - const String & key, const ReadSettings & read_settings, size_t object_size) -{ - auto modified_settings{read_settings}; - modified_settings.remote_read_min_bytes_for_seek = modified_settings.remote_fs_buffer_size; - auto async_reader = object_storage->readObjects(StoredObjects{StoredObject{key, /* local_path */ "", object_size}}, modified_settings); - - async_reader->setReadUntilEnd(); - if (read_settings.remote_fs_prefetch) - async_reader->prefetch(DEFAULT_PREFETCH_PRIORITY); - - return async_reader; -} - -} - -#endif diff --git a/src/Storages/StorageAzureBlob.h b/src/Storages/StorageAzureBlob.h deleted file mode 100644 index b433cd92d68..00000000000 --- a/src/Storages/StorageAzureBlob.h +++ /dev/null @@ -1,347 +0,0 @@ -#pragma once - -#include "config.h" - -#if USE_AZURE_BLOB_STORAGE - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace DB -{ - -class StorageAzureBlob : public IStorage -{ -public: - - using AzureClient = Azure::Storage::Blobs::BlobContainerClient; - using AzureClientPtr = std::unique_ptr; - - struct Configuration : public StatelessTableEngineConfiguration - { - Configuration() = default; - - String getPath() const { return blob_path; } - - bool update(const ContextPtr & context); - - bool withGlobs() const { return blob_path.find_first_of("*?{") != std::string::npos; } - - bool withPartitionWildcard() const - { - static const String PARTITION_ID_WILDCARD = "{_partition_id}"; - return blobs_paths.back().find(PARTITION_ID_WILDCARD) != String::npos; - } - - bool withGlobsIgnorePartitionWildcard() const; - - Poco::URI getConnectionURL() const; - - std::string connection_url; - bool is_connection_string; - - std::optional account_name; - std::optional account_key; - - std::string container; - std::string blob_path; - std::vector blobs_paths; - }; - - StorageAzureBlob( - const Configuration & configuration_, - std::unique_ptr && object_storage_, - const ContextPtr & context_, - const StorageID & table_id_, - const ColumnsDescription & columns_, - const ConstraintsDescription & constraints_, - const String & comment, - std::optional format_settings_, - bool distributed_processing_, - ASTPtr partition_by_); - - static StorageAzureBlob::Configuration getConfiguration(ASTs & engine_args, const ContextPtr & local_context); - static AzureClientPtr createClient(StorageAzureBlob::Configuration configuration, bool is_read_only, bool attempt_to_create_container = true); - - static AzureObjectStorage::SettingsPtr createSettings(const ContextPtr & local_context); - - static void processNamedCollectionResult(StorageAzureBlob::Configuration & configuration, const NamedCollection & collection); - - String getName() const override - { - return name; - } - - void read( - QueryPlan & query_plan, - const Names &, - const StorageSnapshotPtr &, - SelectQueryInfo &, - ContextPtr, - QueryProcessingStage::Enum, - size_t, - size_t) override; - - SinkToStoragePtr write(const ASTPtr & query, const StorageMetadataPtr & /* metadata_snapshot */, ContextPtr context, bool /*async_insert*/) override; - - void truncate(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context, TableExclusiveLockHolder &) override; - - bool supportsPartitionBy() const override; - - bool supportsSubcolumns() const override { return true; } - - bool supportsSubsetOfColumns(const ContextPtr & context) const; - - bool supportsTrivialCountOptimization(const StorageSnapshotPtr &, ContextPtr) const override { return true; } - - bool prefersLargeBlocks() const override; - - bool parallelizeOutputAfterReading(ContextPtr context) const override; - - static SchemaCache & getSchemaCache(const ContextPtr & ctx); - - static ColumnsDescription getTableStructureFromData( - AzureObjectStorage * object_storage, - const Configuration & configuration, - const std::optional & format_settings, - const ContextPtr & ctx); - - static std::pair getTableStructureAndFormatFromData( - AzureObjectStorage * object_storage, - const Configuration & configuration, - const std::optional & format_settings, - const ContextPtr & ctx); - -private: - static std::pair getTableStructureAndFormatFromDataImpl( - std::optional format, - AzureObjectStorage * object_storage, - const Configuration & configuration, - const std::optional & format_settings, - const ContextPtr & ctx); - - friend class ReadFromAzureBlob; - - std::string name; - Configuration configuration; - std::unique_ptr object_storage; - - const bool distributed_processing; - std::optional format_settings; - ASTPtr partition_by; -}; - -class StorageAzureBlobSource : public ISource, WithContext -{ -public: - class IIterator : public WithContext - { - public: - explicit IIterator(const ContextPtr & context_):WithContext(context_) {} - virtual ~IIterator() = default; - virtual RelativePathWithMetadata next() = 0; - - RelativePathWithMetadata operator ()() { return next(); } - }; - - class GlobIterator : public IIterator - { - public: - GlobIterator( - AzureObjectStorage * object_storage_, - const std::string & container_, - String blob_path_with_globs_, - const ActionsDAG::Node * predicate, - const NamesAndTypesList & virtual_columns_, - const ContextPtr & context_, - RelativePathsWithMetadata * outer_blobs_, - std::function file_progress_callback_ = {}); - - RelativePathWithMetadata next() override; - ~GlobIterator() override = default; - - private: - AzureObjectStorage * object_storage; - std::string container; - String blob_path_with_globs; - ActionsDAGPtr filter_dag; - NamesAndTypesList virtual_columns; - - size_t index = 0; - - RelativePathsWithMetadata blobs_with_metadata; - RelativePathsWithMetadata * outer_blobs; - ObjectStorageIteratorPtr object_storage_iterator; - bool recursive{false}; - - std::unique_ptr matcher; - - void createFilterAST(const String & any_key); - bool is_finished = false; - std::mutex next_mutex; - - std::function file_progress_callback; - }; - - class ReadIterator : public IIterator - { - public: - explicit ReadIterator(const ContextPtr & context_, - const ReadTaskCallback & callback_) - : IIterator(context_), callback(callback_) { } - RelativePathWithMetadata next() override - { - return {callback(), {}}; - } - - private: - ReadTaskCallback callback; - }; - - class KeysIterator : public IIterator - { - public: - KeysIterator( - AzureObjectStorage * object_storage_, - const std::string & container_, - const Strings & keys_, - const ActionsDAG::Node * predicate, - const NamesAndTypesList & virtual_columns_, - const ContextPtr & context_, - RelativePathsWithMetadata * outer_blobs, - std::function file_progress_callback = {}); - - RelativePathWithMetadata next() override; - ~KeysIterator() override = default; - - private: - AzureObjectStorage * object_storage; - std::string container; - RelativePathsWithMetadata keys; - - ActionsDAGPtr filter_dag; - NamesAndTypesList virtual_columns; - - std::atomic index = 0; - }; - - StorageAzureBlobSource( - const ReadFromFormatInfo & info, - const String & format_, - String name_, - const ContextPtr & context_, - std::optional format_settings_, - UInt64 max_block_size_, - String compression_hint_, - AzureObjectStorage * object_storage_, - const String & container_, - const String & connection_url_, - std::shared_ptr file_iterator_, - bool need_only_count_); - ~StorageAzureBlobSource() override; - - Chunk generate() override; - - String getName() const override; - -private: - void addNumRowsToCache(const String & path, size_t num_rows); - std::optional tryGetNumRowsFromCache(const RelativePathWithMetadata & path_with_metadata); - - NamesAndTypesList requested_columns; - NamesAndTypesList requested_virtual_columns; - String format; - String name; - Block sample_block; - std::optional format_settings; - ColumnsDescription columns_desc; - UInt64 max_block_size; - String compression_hint; - AzureObjectStorage * object_storage; - String container; - String connection_url; - std::shared_ptr file_iterator; - bool need_only_count; - size_t total_rows_in_file = 0; - - struct ReaderHolder - { - public: - ReaderHolder( - RelativePathWithMetadata relative_path_with_metadata_, - std::unique_ptr read_buf_, - std::shared_ptr source_, - std::unique_ptr pipeline_, - std::unique_ptr reader_) - : relative_path_with_metadata(std::move(relative_path_with_metadata_)) - , read_buf(std::move(read_buf_)) - , source(std::move(source_)) - , pipeline(std::move(pipeline_)) - , reader(std::move(reader_)) - { - } - - ReaderHolder() = default; - ReaderHolder(const ReaderHolder & other) = delete; - ReaderHolder & operator=(const ReaderHolder & other) = delete; - - ReaderHolder(ReaderHolder && other) noexcept - { - *this = std::move(other); - } - - ReaderHolder & operator=(ReaderHolder && other) noexcept - { - /// The order of destruction is important. - /// reader uses pipeline, pipeline uses read_buf. - reader = std::move(other.reader); - pipeline = std::move(other.pipeline); - source = std::move(other.source); - read_buf = std::move(other.read_buf); - relative_path_with_metadata = std::move(other.relative_path_with_metadata); - return *this; - } - - explicit operator bool() const { return reader != nullptr; } - PullingPipelineExecutor * operator->() { return reader.get(); } - const PullingPipelineExecutor * operator->() const { return reader.get(); } - const String & getRelativePath() const { return relative_path_with_metadata.relative_path; } - const RelativePathWithMetadata & getRelativePathWithMetadata() const { return relative_path_with_metadata; } - const IInputFormat * getInputFormat() const { return dynamic_cast(source.get()); } - - private: - RelativePathWithMetadata relative_path_with_metadata; - std::unique_ptr read_buf; - std::shared_ptr source; - std::unique_ptr pipeline; - std::unique_ptr reader; - }; - - ReaderHolder reader; - - LoggerPtr log = getLogger("StorageAzureBlobSource"); - - ThreadPool create_reader_pool; - ThreadPoolCallbackRunnerUnsafe create_reader_scheduler; - std::future reader_future; - - /// Recreate ReadBuffer and Pipeline for each file. - ReaderHolder createReader(); - std::future createReaderAsync(); - - std::unique_ptr createAzureReadBuffer(const String & key, size_t object_size); - std::unique_ptr createAsyncAzureReadBuffer( - const String & key, const ReadSettings & read_settings, size_t object_size); -}; - -} - -#endif diff --git a/src/Storages/StorageAzureBlobCluster.cpp b/src/Storages/StorageAzureBlobCluster.cpp deleted file mode 100644 index a80d121567a..00000000000 --- a/src/Storages/StorageAzureBlobCluster.cpp +++ /dev/null @@ -1,91 +0,0 @@ -#include "Storages/StorageAzureBlobCluster.h" - -#include "config.h" - -#if USE_AZURE_BLOB_STORAGE - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - -StorageAzureBlobCluster::StorageAzureBlobCluster( - const String & cluster_name_, - const StorageAzureBlob::Configuration & configuration_, - std::unique_ptr && object_storage_, - const StorageID & table_id_, - const ColumnsDescription & columns_, - const ConstraintsDescription & constraints_, - const ContextPtr & context) - : IStorageCluster(cluster_name_, table_id_, getLogger("StorageAzureBlobCluster (" + table_id_.table_name + ")")) - , configuration{configuration_} - , object_storage(std::move(object_storage_)) -{ - context->getGlobalContext()->getRemoteHostFilter().checkURL(configuration_.getConnectionURL()); - StorageInMemoryMetadata storage_metadata; - - if (columns_.empty()) - { - ColumnsDescription columns; - /// `format_settings` is set to std::nullopt, because StorageAzureBlobCluster is used only as table function - if (configuration.format == "auto") - std::tie(columns, configuration.format) = StorageAzureBlob::getTableStructureAndFormatFromData(object_storage.get(), configuration, /*format_settings=*/std::nullopt, context); - else - columns = StorageAzureBlob::getTableStructureFromData(object_storage.get(), configuration, /*format_settings=*/std::nullopt, context); - storage_metadata.setColumns(columns); - } - else - { - if (configuration.format == "auto") - configuration.format = StorageAzureBlob::getTableStructureAndFormatFromData(object_storage.get(), configuration, /*format_settings=*/std::nullopt, context).second; - storage_metadata.setColumns(columns_); - } - - storage_metadata.setConstraints(constraints_); - setInMemoryMetadata(storage_metadata); - setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns())); -} - -void StorageAzureBlobCluster::updateQueryToSendIfNeeded(DB::ASTPtr & query, const DB::StorageSnapshotPtr & storage_snapshot, const DB::ContextPtr & context) -{ - ASTExpressionList * expression_list = extractTableFunctionArgumentsFromSelectQuery(query); - if (!expression_list) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected SELECT query from table function s3Cluster, got '{}'", queryToString(query)); - - TableFunctionAzureBlobStorageCluster::updateStructureAndFormatArgumentsIfNeeded( - expression_list->children, storage_snapshot->metadata->getColumns().getAll().toNamesAndTypesDescription(), configuration.format, context); -} - -RemoteQueryExecutor::Extension StorageAzureBlobCluster::getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const -{ - auto iterator = std::make_shared( - object_storage.get(), configuration.container, configuration.blob_path, - predicate, getVirtualsList(), context, nullptr); - - auto callback = std::make_shared>([iterator]() mutable -> String{ return iterator->next().relative_path; }); - return RemoteQueryExecutor::Extension{ .task_iterator = std::move(callback) }; -} - -} - -#endif diff --git a/src/Storages/StorageAzureBlobCluster.h b/src/Storages/StorageAzureBlobCluster.h deleted file mode 100644 index eff4d70f1bd..00000000000 --- a/src/Storages/StorageAzureBlobCluster.h +++ /dev/null @@ -1,52 +0,0 @@ -#pragma once - -#include "config.h" - -#if USE_AZURE_BLOB_STORAGE - -#include -#include - -#include "Client/Connection.h" -#include -#include -#include - -namespace DB -{ - -class Context; - -class StorageAzureBlobCluster : public IStorageCluster -{ -public: - StorageAzureBlobCluster( - const String & cluster_name_, - const StorageAzureBlob::Configuration & configuration_, - std::unique_ptr && object_storage_, - const StorageID & table_id_, - const ColumnsDescription & columns_, - const ConstraintsDescription & constraints_, - const ContextPtr & context); - - std::string getName() const override { return "AzureBlobStorageCluster"; } - - RemoteQueryExecutor::Extension getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const override; - - bool supportsSubcolumns() const override { return true; } - - bool supportsTrivialCountOptimization(const StorageSnapshotPtr &, ContextPtr) const override { return true; } - -private: - void updateBeforeRead(const ContextPtr & /*context*/) override {} - - void updateQueryToSendIfNeeded(ASTPtr & query, const StorageSnapshotPtr & storage_snapshot, const ContextPtr & context) override; - - StorageAzureBlob::Configuration configuration; - std::unique_ptr object_storage; -}; - - -} - -#endif diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index d9a0b2b4d59..a3f6b6afc5d 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -302,6 +302,8 @@ void StorageBuffer::read( auto src_table_query_info = query_info; if (src_table_query_info.prewhere_info) { + src_table_query_info.prewhere_info = src_table_query_info.prewhere_info->clone(); + auto actions_dag = ActionsDAG::makeConvertingActions( header_after_adding_defaults.getColumnsWithTypeAndName(), header.getColumnsWithTypeAndName(), diff --git a/src/Storages/StorageBuffer.h b/src/Storages/StorageBuffer.h index 6c15c7e0238..cd6dd7b933f 100644 --- a/src/Storages/StorageBuffer.h +++ b/src/Storages/StorageBuffer.h @@ -89,6 +89,8 @@ public: bool supportsSubcolumns() const override { return true; } + bool supportsDynamicSubcolumns() const override { return true; } + SinkToStoragePtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, ContextPtr context, bool /*async_insert*/) override; void startup() override; diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 9882d744c29..fbb40f8b79f 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -700,7 +700,7 @@ static bool requiresObjectColumns(const ColumnsDescription & all_columns, ASTPtr auto name_in_storage = Nested::splitName(required_column).first; auto column_in_storage = all_columns.tryGetPhysical(name_in_storage); - if (column_in_storage && column_in_storage->type->hasDynamicSubcolumns()) + if (column_in_storage && column_in_storage->type->hasDynamicSubcolumnsDeprecated()) return true; } diff --git a/src/Storages/StorageDistributed.h b/src/Storages/StorageDistributed.h index 3a7e63aef50..85a8de86953 100644 --- a/src/Storages/StorageDistributed.h +++ b/src/Storages/StorageDistributed.h @@ -85,6 +85,7 @@ public: bool supportsFinal() const override { return true; } bool supportsPrewhere() const override { return true; } bool supportsSubcolumns() const override { return true; } + bool supportsDynamicSubcolumnsDeprecated() const override { return true; } bool supportsDynamicSubcolumns() const override { return true; } StoragePolicyPtr getStoragePolicy() const override; diff --git a/src/Storages/StorageDummy.h b/src/Storages/StorageDummy.h index ae9bf2483e1..572dc07b269 100644 --- a/src/Storages/StorageDummy.h +++ b/src/Storages/StorageDummy.h @@ -26,6 +26,7 @@ public: } bool supportsSubcolumns() const override { return true; } + bool supportsDynamicSubcolumnsDeprecated() const override { return true; } bool supportsDynamicSubcolumns() const override { return true; } bool canMoveConditionsToPrewhere() const override { diff --git a/src/Storages/StorageFile.h b/src/Storages/StorageFile.h index f3c57ba88ed..37da59c3664 100644 --- a/src/Storages/StorageFile.h +++ b/src/Storages/StorageFile.h @@ -90,6 +90,8 @@ public: bool supportsSubcolumns() const override { return true; } + bool supportsDynamicSubcolumns() const override { return true; } + bool prefersLargeBlocks() const override; bool parallelizeOutputAfterReading(ContextPtr context) const override; diff --git a/src/Storages/StorageFileCluster.h b/src/Storages/StorageFileCluster.h index 973d595bbf0..f5a4362901e 100644 --- a/src/Storages/StorageFileCluster.h +++ b/src/Storages/StorageFileCluster.h @@ -32,6 +32,8 @@ public: bool supportsSubcolumns() const override { return true; } + bool supportsDynamicSubcolumns() const override { return true; } + bool supportsTrivialCountOptimization(const StorageSnapshotPtr &, ContextPtr) const override { return true; } private: diff --git a/src/Storages/StorageInMemoryMetadata.cpp b/src/Storages/StorageInMemoryMetadata.cpp index 306ae782d24..a5bae0acce5 100644 --- a/src/Storages/StorageInMemoryMetadata.cpp +++ b/src/Storages/StorageInMemoryMetadata.cpp @@ -628,7 +628,7 @@ void StorageInMemoryMetadata::check(const NamesAndTypesList & provided_columns) const auto * available_type = it->getMapped(); - if (!available_type->hasDynamicSubcolumns() + if (!available_type->hasDynamicSubcolumnsDeprecated() && !column.type->equals(*available_type) && !isCompatibleEnumTypes(available_type, column.type.get())) throw Exception( @@ -676,7 +676,7 @@ void StorageInMemoryMetadata::check(const NamesAndTypesList & provided_columns, const auto * provided_column_type = it->getMapped(); const auto * available_column_type = jt->getMapped(); - if (!provided_column_type->hasDynamicSubcolumns() + if (!provided_column_type->hasDynamicSubcolumnsDeprecated() && !provided_column_type->equals(*available_column_type) && !isCompatibleEnumTypes(available_column_type, provided_column_type)) throw Exception( @@ -720,7 +720,7 @@ void StorageInMemoryMetadata::check(const Block & block, bool need_all) const listOfColumns(available_columns)); const auto * available_type = it->getMapped(); - if (!available_type->hasDynamicSubcolumns() + if (!available_type->hasDynamicSubcolumnsDeprecated() && !column.type->equals(*available_type) && !isCompatibleEnumTypes(available_type, column.type.get())) throw Exception( diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp index 54b2d5ef6fb..08e0526550d 100644 --- a/src/Storages/StorageLog.cpp +++ b/src/Storages/StorageLog.cpp @@ -254,7 +254,7 @@ void LogSource::readData(const NameAndTypePair & name_and_type, ColumnPtr & colu if (!deserialize_states.contains(name)) { settings.getter = create_stream_getter(true); - serialization->deserializeBinaryBulkStatePrefix(settings, deserialize_states[name]); + serialization->deserializeBinaryBulkStatePrefix(settings, deserialize_states[name], nullptr); } settings.getter = create_stream_getter(false); diff --git a/src/Storages/StorageMaterializedView.h b/src/Storages/StorageMaterializedView.h index c7c80078efc..5ecd2ec3819 100644 --- a/src/Storages/StorageMaterializedView.h +++ b/src/Storages/StorageMaterializedView.h @@ -32,6 +32,7 @@ public: bool supportsFinal() const override { return getTargetTable()->supportsFinal(); } bool supportsParallelInsert() const override { return getTargetTable()->supportsParallelInsert(); } bool supportsSubcolumns() const override { return getTargetTable()->supportsSubcolumns(); } + bool supportsDynamicSubcolumns() const override { return getTargetTable()->supportsDynamicSubcolumns(); } bool supportsTransactions() const override { return getTargetTable()->supportsTransactions(); } SinkToStoragePtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, ContextPtr context, bool async_insert) override; diff --git a/src/Storages/StorageMemory.h b/src/Storages/StorageMemory.h index 50581aa0d61..5d269cf814d 100644 --- a/src/Storages/StorageMemory.h +++ b/src/Storages/StorageMemory.h @@ -60,6 +60,7 @@ public: bool supportsParallelInsert() const override { return true; } bool supportsSubcolumns() const override { return true; } + bool supportsDynamicSubcolumnsDeprecated() const override { return true; } bool supportsDynamicSubcolumns() const override { return true; } /// Smaller blocks (e.g. 64K rows) are better for CPU cache. diff --git a/src/Storages/StorageMerge.h b/src/Storages/StorageMerge.h index a63ea1e32ef..735c8711a63 100644 --- a/src/Storages/StorageMerge.h +++ b/src/Storages/StorageMerge.h @@ -49,6 +49,7 @@ public: bool supportsSampling() const override { return true; } bool supportsFinal() const override { return true; } bool supportsSubcolumns() const override { return true; } + bool supportsDynamicSubcolumns() const override { return true; } bool supportsPrewhere() const override { return tableSupportsPrewhere(); } std::optional supportedPrewhereColumns() const override; diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 0c2a2cc2f12..092c686f682 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -309,17 +309,21 @@ void StorageMergeTree::alter( auto table_id = getStorageID(); auto old_storage_settings = getSettings(); + const auto & query_settings = local_context->getSettingsRef(); StorageInMemoryMetadata new_metadata = getInMemoryMetadata(); StorageInMemoryMetadata old_metadata = getInMemoryMetadata(); - auto maybe_mutation_commands = commands.getMutationCommands(new_metadata, local_context->getSettingsRef().materialize_ttl_after_modify, local_context); + auto maybe_mutation_commands = commands.getMutationCommands(new_metadata, query_settings.materialize_ttl_after_modify, local_context); if (!maybe_mutation_commands.empty()) delayMutationOrThrowIfNeeded(nullptr, local_context); Int64 mutation_version = -1; commands.apply(new_metadata, local_context); + if (!query_settings.allow_suspicious_primary_key) + MergeTreeData::verifySortingKey(new_metadata.sorting_key); + /// This alter can be performed at new_metadata level only if (commands.isSettingsAlter()) { @@ -372,7 +376,7 @@ void StorageMergeTree::alter( resetObjectColumnsFromActiveParts(parts_lock); } - if (!maybe_mutation_commands.empty() && local_context->getSettingsRef().alter_sync > 0) + if (!maybe_mutation_commands.empty() && query_settings.alter_sync > 0) waitForMutation(mutation_version, false); } diff --git a/src/Storages/StorageNull.h b/src/Storages/StorageNull.h index f7ee936db8d..74abf931f8f 100644 --- a/src/Storages/StorageNull.h +++ b/src/Storages/StorageNull.h @@ -48,6 +48,8 @@ public: bool supportsSubcolumns() const override { return true; } + bool supportsDynamicSubcolumns() const override { return true; } + SinkToStoragePtr write(const ASTPtr &, const StorageMetadataPtr & metadata_snapshot, ContextPtr, bool) override { return std::make_shared(metadata_snapshot->getSampleBlock()); diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 199ba731f7b..c8e22106657 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -590,6 +590,9 @@ void StorageReplicatedMergeTree::waitMutationToFinishOnReplicas( LOG_DEBUG(log, "Waiting for {} to apply mutation {}", replica, mutation_id); zkutil::EventPtr wait_event = std::make_shared(); + constexpr size_t MAX_RETRIES_ON_FAILED_MUTATION = 30; + size_t retries_on_failed_mutation = 0; + while (!partial_shutdown_called) { /// Mutation maybe killed or whole replica was deleted. @@ -637,18 +640,32 @@ void StorageReplicatedMergeTree::waitMutationToFinishOnReplicas( } } - /// If mutation status is empty, than local replica may just not loaded it into memory. - if (mutation_status && !mutation_status->latest_fail_reason.empty()) - { - LOG_DEBUG(log, "Mutation {} is done {} or failed {} (status: '{}')", mutation_id, mutation_status->is_done, !mutation_status->latest_fail_reason.empty(), mutation_status->latest_fail_reason); - break; - } - /// Replica can become inactive, so wait with timeout, if nothing happened -> recheck it if (!wait_event->tryWait(1000)) { LOG_TRACE(log, "Failed to wait for mutation '{}', will recheck", mutation_id); } + + /// If mutation status is empty, than local replica may just not loaded it into memory. + if (mutation_status && !mutation_status->latest_fail_reason.empty()) + { + LOG_DEBUG(log, "Mutation {} is done {} or failed {} (status: '{}')", mutation_id, mutation_status->is_done, !mutation_status->latest_fail_reason.empty(), mutation_status->latest_fail_reason); + + /// In some cases latest_fail_reason may be retryable and there's a chance it will be cleared after the next attempt + if (++retries_on_failed_mutation <= MAX_RETRIES_ON_FAILED_MUTATION) + continue; + + if (mutation_status->is_done) + { + LOG_DEBUG(log, "Looks like mutation {} is done, rechecking", mutation_id); + continue; + } + + /// It's still possible that latest_fail_reason will be cleared just before queue.getIncompleteMutationsStatus(...) below, + /// but it's unlikely. Anyway, rethrow the exception here to avoid exiting with is_done=false + checkMutationStatus(mutation_status, {mutation_id}); + throw Exception(ErrorCodes::LOGICAL_ERROR, "checkMutationStatus didn't throw when checking status of {}: {}", mutation_id, mutation_status->latest_fail_reason); + } } /// This replica inactive, don't check anything @@ -5999,6 +6016,7 @@ void StorageReplicatedMergeTree::alter( assertNotReadonly(); auto table_id = getStorageID(); + const auto & query_settings = query_context->getSettingsRef(); if (commands.isSettingsAlter()) { @@ -6026,6 +6044,13 @@ void StorageReplicatedMergeTree::alter( return; } + if (!query_settings.allow_suspicious_primary_key) + { + StorageInMemoryMetadata future_metadata = getInMemoryMetadata(); + commands.apply(future_metadata, query_context); + + MergeTreeData::verifySortingKey(future_metadata.sorting_key); + } auto ast_to_str = [](ASTPtr query) -> String { @@ -6158,7 +6183,7 @@ void StorageReplicatedMergeTree::alter( auto maybe_mutation_commands = commands.getMutationCommands( *current_metadata, - query_context->getSettingsRef().materialize_ttl_after_modify, + query_settings.materialize_ttl_after_modify, query_context); bool have_mutation = !maybe_mutation_commands.empty(); @@ -6281,7 +6306,7 @@ void StorageReplicatedMergeTree::alter( { LOG_DEBUG(log, "Metadata changes applied. Will wait for data changes."); merge_selecting_task->schedule(); - waitMutation(*mutation_znode, query_context->getSettingsRef().alter_sync); + waitMutation(*mutation_znode, query_settings.alter_sync); LOG_DEBUG(log, "Data changes applied."); } } diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index 9d086e1dc37..f96206ce657 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -307,7 +307,7 @@ public: /// Get best replica having this partition on a same type remote disk String getSharedDataReplica(const IMergeTreeDataPart & part, const DataSourceDescription & data_source_description) const; - inline const String & getReplicaName() const { return replica_name; } + const String & getReplicaName() const { return replica_name; } /// Restores table metadata if ZooKeeper lost it. /// Used only on restarted readonly replicas (not checked). All active (Active) parts are moved to detached/ diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp deleted file mode 100644 index 2ce188c203c..00000000000 --- a/src/Storages/StorageS3.cpp +++ /dev/null @@ -1,2311 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include "Common/logger_useful.h" -#include "IO/CompressionMethod.h" -#include "IO/ReadBuffer.h" -#include "Interpreters/Context_fwd.h" -#include "Storages/MergeTree/ReplicatedMergeTreePartHeader.h" - -#if USE_AWS_S3 - -#include - -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include -#include - -#include -#include - -#include -#include -#include -#include -#include -#include -#include - - -#include -#include -#include - -#include - -#include - -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include - -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" -#include -#pragma clang diagnostic pop - -namespace fs = std::filesystem; - - -namespace CurrentMetrics -{ - extern const Metric StorageS3Threads; - extern const Metric StorageS3ThreadsActive; - extern const Metric StorageS3ThreadsScheduled; -} - -namespace ProfileEvents -{ - extern const Event S3DeleteObjects; - extern const Event S3ListObjects; - extern const Event EngineFileLikeReadFiles; -} - -namespace DB -{ - -static const std::unordered_set required_configuration_keys = { - "url", -}; -static const std::unordered_set optional_configuration_keys = { - "format", - "compression", - "compression_method", - "structure", - "access_key_id", - "secret_access_key", - "session_token", - "filename", - "use_environment_credentials", - "max_single_read_retries", - "min_upload_part_size", - "upload_part_size_multiply_factor", - "upload_part_size_multiply_parts_count_threshold", - "max_single_part_upload_size", - "max_connections", - "expiration_window_seconds", - "no_sign_request" -}; - -namespace ErrorCodes -{ - extern const int CANNOT_PARSE_TEXT; - extern const int BAD_ARGUMENTS; - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int S3_ERROR; - extern const int UNEXPECTED_EXPRESSION; - extern const int DATABASE_ACCESS_DENIED; - extern const int CANNOT_EXTRACT_TABLE_STRUCTURE; - extern const int CANNOT_DETECT_FORMAT; - extern const int NOT_IMPLEMENTED; - extern const int CANNOT_COMPILE_REGEXP; - extern const int FILE_DOESNT_EXIST; - extern const int NO_ELEMENTS_IN_CONFIG; -} - - -class ReadFromStorageS3Step : public SourceStepWithFilter -{ -public: - std::string getName() const override { return "ReadFromStorageS3Step"; } - - void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; - - void applyFilters(ActionDAGNodes added_filter_nodes) override; - - ReadFromStorageS3Step( - const Names & column_names_, - const SelectQueryInfo & query_info_, - const StorageSnapshotPtr & storage_snapshot_, - const ContextPtr & context_, - Block sample_block, - StorageS3 & storage_, - ReadFromFormatInfo read_from_format_info_, - bool need_only_count_, - size_t max_block_size_, - size_t num_streams_) - : SourceStepWithFilter(DataStream{.header = std::move(sample_block)}, column_names_, query_info_, storage_snapshot_, context_) - , column_names(column_names_) - , storage(storage_) - , read_from_format_info(std::move(read_from_format_info_)) - , need_only_count(need_only_count_) - , query_configuration(storage.getConfigurationCopy()) - , max_block_size(max_block_size_) - , num_streams(num_streams_) - { - query_configuration.update(context); - virtual_columns = storage.getVirtualsList(); - } - -private: - Names column_names; - StorageS3 & storage; - ReadFromFormatInfo read_from_format_info; - bool need_only_count; - StorageS3::Configuration query_configuration; - NamesAndTypesList virtual_columns; - - size_t max_block_size; - size_t num_streams; - - std::shared_ptr iterator_wrapper; - - void createIterator(const ActionsDAG::Node * predicate); -}; - - -class IOutputFormat; -using OutputFormatPtr = std::shared_ptr; - -class StorageS3Source::DisclosedGlobIterator::Impl : WithContext -{ -public: - Impl( - const S3::Client & client_, - const S3::URI & globbed_uri_, - const ActionsDAG::Node * predicate_, - const NamesAndTypesList & virtual_columns_, - ContextPtr context_, - KeysWithInfo * read_keys_, - const S3Settings::RequestSettings & request_settings_, - std::function file_progress_callback_) - : WithContext(context_) - , client(client_.clone()) - , globbed_uri(globbed_uri_) - , predicate(predicate_) - , virtual_columns(virtual_columns_) - , read_keys(read_keys_) - , request_settings(request_settings_) - , list_objects_pool( - CurrentMetrics::StorageS3Threads, CurrentMetrics::StorageS3ThreadsActive, CurrentMetrics::StorageS3ThreadsScheduled, 1) - , list_objects_scheduler(threadPoolCallbackRunnerUnsafe(list_objects_pool, "ListObjects")) - , file_progress_callback(file_progress_callback_) - { - if (globbed_uri.bucket.find_first_of("*?{") != std::string::npos) - throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION, "Expression can not have wildcards inside bucket name"); - - expanded_keys = expandSelectionGlob(globbed_uri.key); - expanded_keys_iter = expanded_keys.begin(); - - fillBufferForKey(*expanded_keys_iter); - expanded_keys_iter++; - } - - KeyWithInfoPtr next(size_t) - { - std::lock_guard lock(mutex); - return nextAssumeLocked(); - } - - size_t objectsCount() - { - return buffer.size(); - } - - bool hasMore() - { - if (buffer.empty()) - return !(expanded_keys_iter == expanded_keys.end() && is_finished_for_key); - else - return true; - } - - ~Impl() - { - list_objects_pool.wait(); - } - -private: - using ListObjectsOutcome = Aws::S3::Model::ListObjectsV2Outcome; - - void fillBufferForKey(const std::string & uri_key) - { - is_finished_for_key = false; - const String key_prefix = uri_key.substr(0, uri_key.find_first_of("*?{")); - - /// We don't have to list bucket, because there is no asterisks. - if (key_prefix.size() == uri_key.size()) - { - buffer.clear(); - buffer.emplace_back(std::make_shared(uri_key, std::nullopt)); - buffer_iter = buffer.begin(); - if (read_keys) - read_keys->insert(read_keys->end(), buffer.begin(), buffer.end()); - is_finished_for_key = true; - return; - } - - request = {}; - request.SetBucket(globbed_uri.bucket); - request.SetPrefix(key_prefix); - request.SetMaxKeys(static_cast(request_settings.list_object_keys_size)); - - outcome_future = listObjectsAsync(); - - matcher = std::make_unique(makeRegexpPatternFromGlobs(uri_key)); - if (!matcher->ok()) - throw Exception(ErrorCodes::CANNOT_COMPILE_REGEXP, - "Cannot compile regex from glob ({}): {}", uri_key, matcher->error()); - - recursive = globbed_uri.key == "/**"; - - filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns); - fillInternalBufferAssumeLocked(); - } - - KeyWithInfoPtr nextAssumeLocked() - { - do - { - if (buffer_iter != buffer.end()) - { - auto answer = *buffer_iter; - ++buffer_iter; - - /// If url doesn't contain globs, we didn't list s3 bucket and didn't get object info for the key. - /// So we get object info lazily here on 'next()' request. - if (!answer->info) - { - try - { - answer->info = S3::getObjectInfo(*client, globbed_uri.bucket, answer->key, globbed_uri.version_id, request_settings); - } - catch (...) - { - /// if no such file AND there was no `{}` glob -- this is an exception - /// otherwise ignore it, this is acceptable - if (expanded_keys.size() == 1) - throw; - continue; - } - if (file_progress_callback) - file_progress_callback(FileProgress(0, answer->info->size)); - } - - return answer; - } - - if (is_finished_for_key) - { - if (expanded_keys_iter != expanded_keys.end()) - { - fillBufferForKey(*expanded_keys_iter); - expanded_keys_iter++; - continue; - } - else - return {}; - } - - try - { - fillInternalBufferAssumeLocked(); - } - catch (...) - { - /// In case of exception thrown while listing new batch of files - /// iterator may be partially initialized and its further using may lead to UB. - /// Iterator is used by several processors from several threads and - /// it may take some time for threads to stop processors and they - /// may still use this iterator after exception is thrown. - /// To avoid this UB, reset the buffer and return defaults for further calls. - is_finished_for_key = true; - buffer.clear(); - buffer_iter = buffer.begin(); - throw; - } - } while (true); - } - - void fillInternalBufferAssumeLocked() - { - buffer.clear(); - assert(outcome_future.valid()); - auto outcome = outcome_future.get(); - - if (!outcome.IsSuccess()) - { - throw S3Exception(outcome.GetError().GetErrorType(), "Could not list objects in bucket {} with prefix {}, S3 exception: {}, message: {}", - quoteString(request.GetBucket()), quoteString(request.GetPrefix()), - backQuote(outcome.GetError().GetExceptionName()), quoteString(outcome.GetError().GetMessage())); - } - - const auto & result_batch = outcome.GetResult().GetContents(); - - /// It returns false when all objects were returned - is_finished_for_key = !outcome.GetResult().GetIsTruncated(); - - if (!is_finished_for_key) - { - /// Even if task is finished the thread may be not freed in pool. - /// So wait until it will be freed before scheduling a new task. - list_objects_pool.wait(); - outcome_future = listObjectsAsync(); - } - - if (request_settings.throw_on_zero_files_match && result_batch.empty()) - throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Can not match any files using prefix {}", request.GetPrefix()); - - KeysWithInfo temp_buffer; - temp_buffer.reserve(result_batch.size()); - - for (const auto & row : result_batch) - { - String key = row.GetKey(); - if (recursive || re2::RE2::FullMatch(key, *matcher)) - { - S3::ObjectInfo info = - { - .size = size_t(row.GetSize()), - .last_modification_time = row.GetLastModified().Millis() / 1000, - }; - - temp_buffer.emplace_back(std::make_shared(std::move(key), std::move(info))); - } - } - - if (temp_buffer.empty()) - { - buffer_iter = buffer.begin(); - return; - } - - if (filter_dag) - { - std::vector paths; - paths.reserve(temp_buffer.size()); - for (const auto & key_with_info : temp_buffer) - paths.push_back(fs::path(globbed_uri.bucket) / key_with_info->key); - - VirtualColumnUtils::filterByPathOrFile(temp_buffer, paths, filter_dag, virtual_columns, getContext()); - } - - buffer = std::move(temp_buffer); - - if (file_progress_callback) - { - for (const auto & key_with_info : buffer) - file_progress_callback(FileProgress(0, key_with_info->info->size)); - } - - /// Set iterator only after the whole batch is processed - buffer_iter = buffer.begin(); - - if (read_keys) - read_keys->insert(read_keys->end(), buffer.begin(), buffer.end()); - } - - std::future listObjectsAsync() - { - return list_objects_scheduler([this] - { - ProfileEvents::increment(ProfileEvents::S3ListObjects); - auto outcome = client->ListObjectsV2(request); - - /// Outcome failure will be handled on the caller side. - if (outcome.IsSuccess()) - request.SetContinuationToken(outcome.GetResult().GetNextContinuationToken()); - - return outcome; - }, Priority{}); - } - - std::mutex mutex; - - KeysWithInfo buffer; - KeysWithInfo::iterator buffer_iter; - - std::vector expanded_keys; - std::vector::iterator expanded_keys_iter; - - std::unique_ptr client; - S3::URI globbed_uri; - const ActionsDAG::Node * predicate; - ASTPtr query; - NamesAndTypesList virtual_columns; - ActionsDAGPtr filter_dag; - std::unique_ptr matcher; - bool recursive{false}; - bool is_finished_for_key{false}; - KeysWithInfo * read_keys; - - S3::ListObjectsV2Request request; - S3Settings::RequestSettings request_settings; - - ThreadPool list_objects_pool; - ThreadPoolCallbackRunnerUnsafe list_objects_scheduler; - std::future outcome_future; - std::function file_progress_callback; -}; - -StorageS3Source::DisclosedGlobIterator::DisclosedGlobIterator( - const S3::Client & client_, - const S3::URI & globbed_uri_, - const ActionsDAG::Node * predicate, - const NamesAndTypesList & virtual_columns_, - const ContextPtr & context, - KeysWithInfo * read_keys_, - const S3Settings::RequestSettings & request_settings_, - std::function file_progress_callback_) - : pimpl(std::make_shared( - client_, globbed_uri_, predicate, virtual_columns_, context, read_keys_, request_settings_, file_progress_callback_)) -{ -} - -StorageS3Source::KeyWithInfoPtr StorageS3Source::DisclosedGlobIterator::next(size_t idx) /// NOLINT -{ - return pimpl->next(idx); -} - -size_t StorageS3Source::DisclosedGlobIterator::estimatedKeysCount() -{ - if (pimpl->hasMore()) - { - /// 1000 files were listed, and we cannot make any estimation of _how many more_ there are (because we list bucket lazily); - /// If there are more objects in the bucket, limiting the number of streams is the last thing we may want to do - /// as it would lead to serious slow down of the execution, since objects are going - /// to be fetched sequentially rather than in-parallel with up to times. - return std::numeric_limits::max(); - } - else - return pimpl->objectsCount(); -} - -class StorageS3Source::KeysIterator::Impl -{ -public: - explicit Impl( - const S3::Client & client_, - const std::string & version_id_, - const std::vector & keys_, - const String & bucket_, - const S3Settings::RequestSettings & request_settings_, - KeysWithInfo * read_keys_, - std::function file_progress_callback_) - : keys(keys_) - , client(client_.clone()) - , version_id(version_id_) - , bucket(bucket_) - , request_settings(request_settings_) - , file_progress_callback(file_progress_callback_) - { - if (read_keys_) - { - for (const auto & key : keys) - read_keys_->push_back(std::make_shared(key)); - } - } - - KeyWithInfoPtr next(size_t) - { - size_t current_index = index.fetch_add(1, std::memory_order_relaxed); - if (current_index >= keys.size()) - return {}; - auto key = keys[current_index]; - std::optional info; - if (file_progress_callback) - { - info = S3::getObjectInfo(*client, bucket, key, version_id, request_settings); - file_progress_callback(FileProgress(0, info->size)); - } - - return std::make_shared(key, info); - } - - size_t objectsCount() - { - return keys.size(); - } - -private: - Strings keys; - std::atomic_size_t index = 0; - std::unique_ptr client; - String version_id; - String bucket; - S3Settings::RequestSettings request_settings; - std::function file_progress_callback; -}; - -StorageS3Source::KeysIterator::KeysIterator( - const S3::Client & client_, - const std::string & version_id_, - const std::vector & keys_, - const String & bucket_, - const S3Settings::RequestSettings & request_settings_, - KeysWithInfo * read_keys, - std::function file_progress_callback_) - : pimpl(std::make_shared( - client_, version_id_, keys_, bucket_, request_settings_, read_keys, file_progress_callback_)) -{ -} - -StorageS3Source::KeyWithInfoPtr StorageS3Source::KeysIterator::next(size_t idx) /// NOLINT -{ - return pimpl->next(idx); -} - -size_t StorageS3Source::KeysIterator::estimatedKeysCount() -{ - return pimpl->objectsCount(); -} - -StorageS3Source::ReadTaskIterator::ReadTaskIterator( - const DB::ReadTaskCallback & callback_, - size_t max_threads_count) - : callback(callback_) -{ - ThreadPool pool(CurrentMetrics::StorageS3Threads, CurrentMetrics::StorageS3ThreadsActive, CurrentMetrics::StorageS3ThreadsScheduled, max_threads_count); - auto pool_scheduler = threadPoolCallbackRunnerUnsafe(pool, "S3ReadTaskItr"); - - std::vector> keys; - keys.reserve(max_threads_count); - for (size_t i = 0; i < max_threads_count; ++i) - keys.push_back(pool_scheduler([this] { return callback(); }, Priority{})); - - pool.wait(); - buffer.reserve(max_threads_count); - for (auto & key_future : keys) - buffer.emplace_back(std::make_shared(key_future.get())); -} - -StorageS3Source::KeyWithInfoPtr StorageS3Source::ReadTaskIterator::next(size_t) /// NOLINT -{ - size_t current_index = index.fetch_add(1, std::memory_order_relaxed); - if (current_index >= buffer.size()) - return std::make_shared(callback()); - - while (current_index < buffer.size()) - { - if (const auto & key_info = buffer[current_index]; key_info && !key_info->key.empty()) - return buffer[current_index]; - - current_index = index.fetch_add(1, std::memory_order_relaxed); - } - - return nullptr; -} - -size_t StorageS3Source::ReadTaskIterator::estimatedKeysCount() -{ - return buffer.size(); -} - - -StorageS3Source::ArchiveIterator::ArchiveIterator( - std::unique_ptr basic_iterator_, - const std::string & archive_pattern_, - std::shared_ptr client_, - const String & bucket_, - const String & version_id_, - const S3Settings::RequestSettings & request_settings_, - ContextPtr context_, - KeysWithInfo * read_keys_) - : WithContext(context_) - , basic_iterator(std::move(basic_iterator_)) - , basic_key_with_info_ptr(nullptr) - , client(client_) - , bucket(bucket_) - , version_id(version_id_) - , request_settings(request_settings_) - , read_keys(read_keys_) -{ - if (archive_pattern_.find_first_of("*?{") != std::string::npos) - { - auto matcher = std::make_shared(makeRegexpPatternFromGlobs(archive_pattern_)); - if (!matcher->ok()) - throw Exception( - ErrorCodes::CANNOT_COMPILE_REGEXP, "Cannot compile regex from glob ({}): {}", archive_pattern_, matcher->error()); - filter = IArchiveReader::NameFilter{[matcher](const std::string & p) mutable { return re2::RE2::FullMatch(p, *matcher); }}; - } - else - { - path_in_archive = archive_pattern_; - } -} - -StorageS3Source::KeyWithInfoPtr StorageS3Source::ArchiveIterator::next(size_t) -{ - if (!path_in_archive.empty()) - { - std::unique_lock lock{take_next_mutex}; - while (true) - { - basic_key_with_info_ptr = basic_iterator->next(); - if (!basic_key_with_info_ptr) - return {}; - refreshArchiveReader(); - bool file_exists = archive_reader->fileExists(path_in_archive); - if (file_exists) - { - KeyWithInfoPtr archive_key_with_info - = std::make_shared(basic_key_with_info_ptr->key, std::nullopt, path_in_archive, archive_reader); - if (read_keys != nullptr) - read_keys->push_back(archive_key_with_info); - return archive_key_with_info; - } - } - } - else - { - std::unique_lock lock{take_next_mutex}; - while (true) - { - if (!file_enumerator) - { - basic_key_with_info_ptr = basic_iterator->next(); - if (!basic_key_with_info_ptr) - return {}; - refreshArchiveReader(); - file_enumerator = archive_reader->firstFile(); - if (!file_enumerator) - { - file_enumerator.reset(); - continue; - } - } - else if (!file_enumerator->nextFile()) - { - file_enumerator.reset(); - continue; - } - - String current_filename = file_enumerator->getFileName(); - bool satisfies = filter(current_filename); - if (satisfies) - { - KeyWithInfoPtr archive_key_with_info - = std::make_shared(basic_key_with_info_ptr->key, std::nullopt, current_filename, archive_reader); - if (read_keys != nullptr) - read_keys->push_back(archive_key_with_info); - return archive_key_with_info; - } - } - } -} - -size_t StorageS3Source::ArchiveIterator::estimatedKeysCount() -{ - return basic_iterator->estimatedKeysCount(); -} - -void StorageS3Source::ArchiveIterator::refreshArchiveReader() -{ - if (basic_key_with_info_ptr) - { - if (!basic_key_with_info_ptr->info) - { - basic_key_with_info_ptr->info = S3::getObjectInfo(*client, bucket, basic_key_with_info_ptr->key, version_id, request_settings); - } - archive_reader = createArchiveReader( - basic_key_with_info_ptr->key, - [key = basic_key_with_info_ptr->key, archive_size = basic_key_with_info_ptr->info.value().size, context = getContext(), this]() - { return createS3ReadBuffer(key, archive_size, context, client, bucket, version_id, request_settings); }, - basic_key_with_info_ptr->info.value().size); - } - else - { - archive_reader = nullptr; - } -} - -StorageS3Source::StorageS3Source( - const ReadFromFormatInfo & info, - const String & format_, - String name_, - const ContextPtr & context_, - std::optional format_settings_, - UInt64 max_block_size_, - const S3Settings::RequestSettings & request_settings_, - String compression_hint_, - const std::shared_ptr & client_, - const String & bucket_, - const String & version_id_, - const String & url_host_and_port_, - std::shared_ptr file_iterator_, - const size_t max_parsing_threads_, - bool need_only_count_) - : SourceWithKeyCondition(info.source_header, false) - , WithContext(context_) - , name(std::move(name_)) - , bucket(bucket_) - , version_id(version_id_) - , url_host_and_port(url_host_and_port_) - , format(format_) - , columns_desc(info.columns_description) - , requested_columns(info.requested_columns) - , max_block_size(max_block_size_) - , request_settings(request_settings_) - , compression_hint(std::move(compression_hint_)) - , client(client_) - , sample_block(info.format_header) - , format_settings(format_settings_) - , requested_virtual_columns(info.requested_virtual_columns) - , file_iterator(file_iterator_) - , max_parsing_threads(max_parsing_threads_) - , need_only_count(need_only_count_) - , create_reader_pool( - CurrentMetrics::StorageS3Threads, CurrentMetrics::StorageS3ThreadsActive, CurrentMetrics::StorageS3ThreadsScheduled, 1) - , create_reader_scheduler(threadPoolCallbackRunnerUnsafe(create_reader_pool, "CreateS3Reader")) -{ -} - -void StorageS3Source::lazyInitialize(size_t idx) -{ - if (initialized) - return; - - reader = createReader(idx); - if (reader) - reader_future = createReaderAsync(idx); - initialized = true; -} - -StorageS3Source::ReaderHolder StorageS3Source::createReader(size_t idx) -{ - KeyWithInfoPtr key_with_info; - do - { - key_with_info = file_iterator->next(idx); - if (!key_with_info || key_with_info->key.empty()) - return {}; - - if (!key_with_info->info) - key_with_info->info = S3::getObjectInfo(*client, bucket, key_with_info->key, version_id, request_settings); - } - while (getContext()->getSettingsRef().s3_skip_empty_files && key_with_info->info->size == 0); - - QueryPipelineBuilder builder; - std::shared_ptr source; - std::unique_ptr read_buf; - std::optional num_rows_from_cache = need_only_count && getContext()->getSettingsRef().use_cache_for_count_from_files ? tryGetNumRowsFromCache(*key_with_info) : std::nullopt; - if (num_rows_from_cache) - { - /// We should not return single chunk with all number of rows, - /// because there is a chance that this chunk will be materialized later - /// (it can cause memory problems even with default values in columns or when virtual columns are requested). - /// Instead, we use special ConstChunkGenerator that will generate chunks - /// with max_block_size rows until total number of rows is reached. - source = std::make_shared(sample_block, *num_rows_from_cache, max_block_size); - builder.init(Pipe(source)); - } - else - { - auto compression_method = CompressionMethod::None; - if (!key_with_info->path_in_archive.has_value()) - { - compression_method = chooseCompressionMethod(key_with_info->key, compression_hint); - read_buf = createS3ReadBuffer( - key_with_info->key, key_with_info->info->size, getContext(), client, bucket, version_id, request_settings); - } - else - { - compression_method = chooseCompressionMethod(key_with_info->path_in_archive.value(), compression_hint); - read_buf = key_with_info->archive_reader->readFile(key_with_info->path_in_archive.value(), /*throw_on_not_found=*/true); - } - auto input_format = FormatFactory::instance().getInput( - format, - *read_buf, - sample_block, - getContext(), - max_block_size, - format_settings, - max_parsing_threads, - /* max_download_threads= */ std::nullopt, - /* is_remote_fs */ true, - compression_method, - need_only_count); - - if (key_condition) - input_format->setKeyCondition(key_condition); - - if (need_only_count) - input_format->needOnlyCount(); - - builder.init(Pipe(input_format)); - - if (columns_desc.hasDefaults()) - { - builder.addSimpleTransform( - [&](const Block & header) - { return std::make_shared(header, columns_desc, *input_format, getContext()); }); - } - - source = input_format; - } - - /// Add ExtractColumnsTransform to extract requested columns/subcolumns - /// from chunk read by IInputFormat. - builder.addSimpleTransform([&](const Block & header) - { - return std::make_shared(header, requested_columns); - }); - - auto pipeline = std::make_unique(QueryPipelineBuilder::getPipeline(std::move(builder))); - auto current_reader = std::make_unique(*pipeline); - - ProfileEvents::increment(ProfileEvents::EngineFileLikeReadFiles); - - return ReaderHolder{key_with_info, bucket, std::move(read_buf), std::move(source), std::move(pipeline), std::move(current_reader)}; -} - -std::future StorageS3Source::createReaderAsync(size_t idx) -{ - return create_reader_scheduler([=, this] { return createReader(idx); }, Priority{}); -} - -std::unique_ptr createS3ReadBuffer( - const String & key, - size_t object_size, - std::shared_ptr context, - std::shared_ptr client_ptr, - const String & bucket, - const String & version_id, - const S3Settings::RequestSettings & request_settings) -{ - auto read_settings = context->getReadSettings().adjustBufferSize(object_size); - read_settings.enable_filesystem_cache = false; - auto download_buffer_size = context->getSettings().max_download_buffer_size; - const bool object_too_small = object_size <= 2 * download_buffer_size; - static LoggerPtr log = getLogger("StorageS3Source"); - - // Create a read buffer that will prefetch the first ~1 MB of the file. - // When reading lots of tiny files, this prefetching almost doubles the throughput. - // For bigger files, parallel reading is more useful. - if (object_too_small && read_settings.remote_fs_method == RemoteFSReadMethod::threadpool) - { - LOG_TRACE(log, "Downloading object of size {} from S3 with initial prefetch", object_size); - return createAsyncS3ReadBuffer(key, read_settings, object_size, context, client_ptr, bucket, version_id, request_settings); - } - - - return std::make_unique( - client_ptr, - bucket, - key, - version_id, - request_settings, - read_settings, - /*use_external_buffer*/ false, - /*offset_*/ 0, - /*read_until_position_*/ 0, - /*restricted_seek_*/ false, - object_size); -} - -std::unique_ptr createAsyncS3ReadBuffer( - const String & key, - const ReadSettings & read_settings, - size_t object_size, - std::shared_ptr context, - std::shared_ptr client_ptr, - const String & bucket, - const String & version_id, - const S3Settings::RequestSettings & request_settings) -{ - auto read_buffer_creator = [=](bool restricted_seek, const StoredObject & object) -> std::unique_ptr - { - return std::make_unique( - client_ptr, - bucket, - object.remote_path, - version_id, - request_settings, - read_settings, - /* use_external_buffer */ true, - /* offset */ 0, - /* read_until_position */ 0, - restricted_seek, - object_size); - }; - - auto modified_settings{read_settings}; - /// User's S3 object may change, don't cache it. - modified_settings.use_page_cache_for_disks_without_file_cache = false; - - /// FIXME: Changing this setting to default value breaks something around parquet reading - modified_settings.remote_read_min_bytes_for_seek = modified_settings.remote_fs_buffer_size; - - auto s3_impl = std::make_unique( - std::move(read_buffer_creator), - StoredObjects{StoredObject{key, /* local_path */ "", object_size}}, - "", - read_settings, - /* cache_log */ nullptr, - /* use_external_buffer */ true); - - auto & pool_reader = context->getThreadPoolReader(FilesystemReaderType::ASYNCHRONOUS_REMOTE_FS_READER); - auto async_reader = std::make_unique( - std::move(s3_impl), pool_reader, modified_settings, context->getAsyncReadCounters(), context->getFilesystemReadPrefetchesLog()); - - async_reader->setReadUntilEnd(); - if (read_settings.remote_fs_prefetch) - async_reader->prefetch(DEFAULT_PREFETCH_PRIORITY); - - return async_reader; -} - -StorageS3Source::~StorageS3Source() -{ - create_reader_pool.wait(); -} - -String StorageS3Source::getName() const -{ - return name; -} - -Chunk StorageS3Source::generate() -{ - lazyInitialize(); - - while (true) - { - if (isCancelled() || !reader) - { - if (reader) - reader->cancel(); - break; - } - - Chunk chunk; - if (reader->pull(chunk)) - { - UInt64 num_rows = chunk.getNumRows(); - total_rows_in_file += num_rows; - size_t chunk_size = 0; - if (const auto * input_format = reader.getInputFormat()) - chunk_size = reader.getInputFormat()->getApproxBytesReadForChunk(); - progress(num_rows, chunk_size ? chunk_size : chunk.bytes()); - String file_name = reader.getFile(); - VirtualColumnUtils::addRequestedPathFileAndSizeVirtualsToChunk( - chunk, requested_virtual_columns, reader.getPath(), reader.getFileSize(), reader.isArchive() ? (&file_name) : nullptr); - return chunk; - } - - if (reader.getInputFormat() && getContext()->getSettingsRef().use_cache_for_count_from_files) - addNumRowsToCache(reader.getPath(), total_rows_in_file); - - total_rows_in_file = 0; - - assert(reader_future.valid()); - reader = reader_future.get(); - - if (!reader) - break; - - /// Even if task is finished the thread may be not freed in pool. - /// So wait until it will be freed before scheduling a new task. - create_reader_pool.wait(); - reader_future = createReaderAsync(); - } - - return {}; -} - -void StorageS3Source::addNumRowsToCache(const String & bucket_with_key, size_t num_rows) -{ - String source = fs::path(url_host_and_port) / bucket_with_key; - auto cache_key = getKeyForSchemaCache(source, format, format_settings, getContext()); - StorageS3::getSchemaCache(getContext()).addNumRows(cache_key, num_rows); -} - -std::optional StorageS3Source::tryGetNumRowsFromCache(const KeyWithInfo & key_with_info) -{ - String source = fs::path(url_host_and_port) / bucket / key_with_info.key; - auto cache_key = getKeyForSchemaCache(source, format, format_settings, getContext()); - auto get_last_mod_time = [&]() -> std::optional { return key_with_info.info->last_modification_time; }; - - return StorageS3::getSchemaCache(getContext()).tryGetNumRows(cache_key, get_last_mod_time); -} - -class StorageS3Sink : public SinkToStorage -{ -public: - StorageS3Sink( - const String & format, - const Block & sample_block_, - const ContextPtr & context, - std::optional format_settings_, - const CompressionMethod compression_method, - const StorageS3::Configuration & configuration_, - const String & bucket, - const String & key) - : SinkToStorage(sample_block_), sample_block(sample_block_), format_settings(format_settings_) - { - BlobStorageLogWriterPtr blob_log = nullptr; - if (auto blob_storage_log = context->getBlobStorageLog()) - { - blob_log = std::make_shared(std::move(blob_storage_log)); - blob_log->query_id = context->getCurrentQueryId(); - } - - const auto & settings = context->getSettingsRef(); - write_buf = wrapWriteBufferWithCompressionMethod( - std::make_unique( - configuration_.client, - bucket, - key, - DBMS_DEFAULT_BUFFER_SIZE, - configuration_.request_settings, - std::move(blob_log), - std::nullopt, - threadPoolCallbackRunnerUnsafe(getIOThreadPool().get(), "S3ParallelWrite"), - context->getWriteSettings()), - compression_method, - static_cast(settings.output_format_compression_level), - static_cast(settings.output_format_compression_zstd_window_log)); - writer - = FormatFactory::instance().getOutputFormatParallelIfPossible(format, *write_buf, sample_block, context, format_settings); - } - - String getName() const override { return "StorageS3Sink"; } - - void consume(Chunk chunk) override - { - std::lock_guard lock(cancel_mutex); - if (cancelled) - return; - writer->write(getHeader().cloneWithColumns(chunk.detachColumns())); - } - - void onCancel() override - { - std::lock_guard lock(cancel_mutex); - finalize(); - cancelled = true; - } - - void onException(std::exception_ptr exception) override - { - std::lock_guard lock(cancel_mutex); - try - { - std::rethrow_exception(exception); - } - catch (...) - { - /// An exception context is needed to proper delete write buffers without finalization - release(); - } - } - - void onFinish() override - { - std::lock_guard lock(cancel_mutex); - finalize(); - } - -private: - void finalize() - { - if (!writer) - return; - - try - { - writer->finalize(); - writer->flush(); - write_buf->finalize(); - } - catch (...) - { - /// Stop ParallelFormattingOutputFormat correctly. - release(); - throw; - } - } - - void release() - { - writer.reset(); - write_buf.reset(); - } - - Block sample_block; - std::optional format_settings; - std::unique_ptr write_buf; - OutputFormatPtr writer; - bool cancelled = false; - std::mutex cancel_mutex; -}; - -namespace -{ - -std::optional checkAndGetNewFileOnInsertIfNeeded( - const ContextPtr & context, const StorageS3::Configuration & configuration, const String & key, size_t sequence_number) -{ - if (context->getSettingsRef().s3_truncate_on_insert - || !S3::objectExists( - *configuration.client, configuration.url.bucket, key, configuration.url.version_id, configuration.request_settings)) - return std::nullopt; - - if (context->getSettingsRef().s3_create_new_file_on_insert) - { - auto pos = key.find_first_of('.'); - String new_key; - do - { - new_key = key.substr(0, pos) + "." + std::to_string(sequence_number) + (pos == std::string::npos ? "" : key.substr(pos)); - ++sequence_number; - } while (S3::objectExists( - *configuration.client, configuration.url.bucket, new_key, configuration.url.version_id, configuration.request_settings)); - - return new_key; - } - - throw Exception( - ErrorCodes::BAD_ARGUMENTS, - "Object in bucket {} with key {} already exists. " - "If you want to overwrite it, enable setting s3_truncate_on_insert, if you " - "want to create a new file on each insert, enable setting s3_create_new_file_on_insert", - configuration.url.bucket, key); -} -} - - -class PartitionedStorageS3Sink : public PartitionedSink, WithContext -{ -public: - PartitionedStorageS3Sink( - const ASTPtr & partition_by, - const String & format_, - const Block & sample_block_, - const ContextPtr & context_, - std::optional format_settings_, - const CompressionMethod compression_method_, - const StorageS3::Configuration & configuration_, - const String & bucket_, - const String & key_) - : PartitionedSink(partition_by, context_, sample_block_) - , WithContext(context_) - , format(format_) - , sample_block(sample_block_) - , compression_method(compression_method_) - , configuration(configuration_) - , bucket(bucket_) - , key(key_) - , format_settings(format_settings_) - { - } - - SinkPtr createSinkForPartition(const String & partition_id) override - { - auto partition_bucket = replaceWildcards(bucket, partition_id); - validateBucket(partition_bucket); - - auto partition_key = replaceWildcards(key, partition_id); - validateKey(partition_key); - - if (auto new_key = checkAndGetNewFileOnInsertIfNeeded(getContext(), configuration, partition_key, /* sequence_number */ 1)) - partition_key = *new_key; - - return std::make_shared( - format, sample_block, getContext(), format_settings, compression_method, configuration, partition_bucket, partition_key); - } - -private: - const String format; - const Block sample_block; - const CompressionMethod compression_method; - const StorageS3::Configuration configuration; - const String bucket; - const String key; - const std::optional format_settings; - - static void validateBucket(const String & str) - { - S3::URI::validateBucket(str, {}); - - if (!DB::UTF8::isValidUTF8(reinterpret_cast(str.data()), str.size())) - throw Exception(ErrorCodes::CANNOT_PARSE_TEXT, "Incorrect non-UTF8 sequence in bucket name"); - - validatePartitionKey(str, false); - } - - static void validateKey(const String & str) - { - /// See: - /// - https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html - /// - https://cloud.ibm.com/apidocs/cos/cos-compatibility#putobject - - if (str.empty() || str.size() > 1024) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Incorrect key length (not empty, max 1023 characters), got: {}", str.size()); - - if (!DB::UTF8::isValidUTF8(reinterpret_cast(str.data()), str.size())) - throw Exception(ErrorCodes::CANNOT_PARSE_TEXT, "Incorrect non-UTF8 sequence in key"); - - validatePartitionKey(str, true); - } -}; - - -StorageS3::StorageS3( - const Configuration & configuration_, - const ContextPtr & context_, - const StorageID & table_id_, - const ColumnsDescription & columns_, - const ConstraintsDescription & constraints_, - const String & comment, - std::optional format_settings_, - bool distributed_processing_, - ASTPtr partition_by_) - : IStorage(table_id_) - , configuration(configuration_) - , name(configuration.url.storage_name) - , distributed_processing(distributed_processing_) - , format_settings(format_settings_) - , partition_by(partition_by_) -{ - updateConfiguration(context_); // NOLINT(clang-analyzer-optin.cplusplus.VirtualCall) - - if (configuration.format != "auto") - FormatFactory::instance().checkFormatName(configuration.format); - context_->getGlobalContext()->getRemoteHostFilter().checkURL(configuration.url.uri); - context_->getGlobalContext()->getHTTPHeaderFilter().checkHeaders(configuration.headers_from_ast); - - StorageInMemoryMetadata storage_metadata; - if (columns_.empty()) - { - ColumnsDescription columns; - if (configuration.format == "auto") - std::tie(columns, configuration.format) = getTableStructureAndFormatFromData(configuration, format_settings, context_); - else - columns = getTableStructureFromData(configuration, format_settings, context_); - - storage_metadata.setColumns(columns); - } - else - { - if (configuration.format == "auto") - configuration.format = getTableStructureAndFormatFromData(configuration, format_settings, context_).second; - - /// We don't allow special columns in S3 storage. - if (!columns_.hasOnlyOrdinary()) - throw Exception( - ErrorCodes::BAD_ARGUMENTS, "Table engine S3 doesn't support special columns like MATERIALIZED, ALIAS or EPHEMERAL"); - storage_metadata.setColumns(columns_); - } - - storage_metadata.setConstraints(constraints_); - storage_metadata.setComment(comment); - setInMemoryMetadata(storage_metadata); - setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns())); -} - -static std::shared_ptr createFileIterator( - StorageS3::Configuration configuration, - bool distributed_processing, - ContextPtr local_context, - const ActionsDAG::Node * predicate, - const NamesAndTypesList & virtual_columns, - StorageS3Source::KeysWithInfo * read_keys = nullptr, - std::function file_progress_callback = {}) -{ - if (distributed_processing) - { - return std::make_shared( - local_context->getReadTaskCallback(), local_context->getSettingsRef().max_threads); - } - else - { - auto basic_iterator = [&]() -> std::unique_ptr - { - StorageS3Source::KeysWithInfo * local_read_keys = configuration.url.archive_pattern.has_value() ? nullptr : read_keys; - if (configuration.withGlobs()) - { - /// Iterate through disclosed globs and make a source for each file - return std::make_unique( - *configuration.client, - configuration.url, - predicate, - virtual_columns, - local_context, - local_read_keys, - configuration.request_settings, - file_progress_callback); - } - else - { - Strings keys = configuration.keys; - auto filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns); - if (filter_dag) - { - std::vector paths; - paths.reserve(keys.size()); - for (const auto & key : keys) - paths.push_back(fs::path(configuration.url.bucket) / key); - VirtualColumnUtils::filterByPathOrFile(keys, paths, filter_dag, virtual_columns, local_context); - } - return std::make_unique( - *configuration.client, - configuration.url.version_id, - keys, - configuration.url.bucket, - configuration.request_settings, - local_read_keys, - file_progress_callback); - } - }(); - if (configuration.url.archive_pattern.has_value()) - { - return std::make_shared( - std::move(basic_iterator), - configuration.url.archive_pattern.value(), - configuration.client, - configuration.url.bucket, - configuration.url.version_id, - configuration.request_settings, - local_context, - read_keys); - } - else - { - return basic_iterator; - } - } -} - -bool StorageS3::supportsSubsetOfColumns(const ContextPtr & context) const -{ - return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(getFormatCopy(), context, format_settings); -} - -bool StorageS3::prefersLargeBlocks() const -{ - return FormatFactory::instance().checkIfOutputFormatPrefersLargeBlocks(getFormatCopy()); -} - -bool StorageS3::parallelizeOutputAfterReading(ContextPtr context) const -{ - return FormatFactory::instance().checkParallelizeOutputAfterReading(getFormatCopy(), context); -} - -void StorageS3::read( - QueryPlan & query_plan, - const Names & column_names, - const StorageSnapshotPtr & storage_snapshot, - SelectQueryInfo & query_info, - ContextPtr local_context, - QueryProcessingStage::Enum /*processed_stage*/, - size_t max_block_size, - size_t num_streams) -{ - updateConfiguration(local_context); - auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context)); - - bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty()) - && local_context->getSettingsRef().optimize_count_from_files; - - auto reading = std::make_unique( - column_names, - query_info, - storage_snapshot, - local_context, - read_from_format_info.source_header, - *this, - std::move(read_from_format_info), - need_only_count, - max_block_size, - num_streams); - - query_plan.addStep(std::move(reading)); -} - -void ReadFromStorageS3Step::applyFilters(ActionDAGNodes added_filter_nodes) -{ - SourceStepWithFilter::applyFilters(std::move(added_filter_nodes)); - - const ActionsDAG::Node * predicate = nullptr; - if (filter_actions_dag) - predicate = filter_actions_dag->getOutputs().at(0); - createIterator(predicate); -} - -void ReadFromStorageS3Step::createIterator(const ActionsDAG::Node * predicate) -{ - if (iterator_wrapper) - return; - - iterator_wrapper = createFileIterator( - storage.getConfigurationCopy(), - storage.distributed_processing, - context, - predicate, - virtual_columns, - nullptr, - context->getFileProgressCallback()); -} - -void ReadFromStorageS3Step::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) -{ - if (storage.partition_by && query_configuration.withPartitionWildcard()) - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Reading from a partitioned S3 storage is not implemented yet"); - - createIterator(nullptr); - size_t estimated_keys_count = iterator_wrapper->estimatedKeysCount(); - if (estimated_keys_count > 1) - num_streams = std::min(num_streams, estimated_keys_count); - else - { - /// The amount of keys (zero) was probably underestimated. We will keep one stream for this particular case. - num_streams = 1; - } - - const size_t max_threads = context->getSettingsRef().max_threads; - const size_t max_parsing_threads = num_streams >= max_threads ? 1 : (max_threads / std::max(num_streams, 1ul)); - - Pipes pipes; - pipes.reserve(num_streams); - for (size_t i = 0; i < num_streams; ++i) - { - auto source = std::make_shared( - read_from_format_info, - query_configuration.format, - storage.getName(), - context, - storage.format_settings, - max_block_size, - query_configuration.request_settings, - query_configuration.compression_method, - query_configuration.client, - query_configuration.url.bucket, - query_configuration.url.version_id, - query_configuration.url.uri.getHost() + std::to_string(query_configuration.url.uri.getPort()), - iterator_wrapper, - max_parsing_threads, - need_only_count); - - source->setKeyCondition(filter_actions_dag, context); - pipes.emplace_back(std::move(source)); - } - - auto pipe = Pipe::unitePipes(std::move(pipes)); - if (pipe.empty()) - pipe = Pipe(std::make_shared(read_from_format_info.source_header)); - - for (const auto & processor : pipe.getProcessors()) - processors.emplace_back(processor); - - pipeline.init(std::move(pipe)); -} - -SinkToStoragePtr StorageS3::write( - const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context, bool /*async_insert*/) -{ - auto query_configuration = updateConfigurationAndGetCopy(local_context); - auto key = query_configuration.keys.front(); - - if (query_configuration.withGlobsIgnorePartitionWildcard()) - throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED, - "S3 key '{}' contains globs, so the table is in readonly mode", query_configuration.url.key); - - auto sample_block = metadata_snapshot->getSampleBlock(); - auto chosen_compression_method = chooseCompressionMethod(query_configuration.keys.back(), query_configuration.compression_method); - auto insert_query = std::dynamic_pointer_cast(query); - - auto partition_by_ast = insert_query ? (insert_query->partition_by ? insert_query->partition_by : partition_by) : nullptr; - bool is_partitioned_implementation = partition_by_ast && query_configuration.withPartitionWildcard(); - - if (is_partitioned_implementation) - { - return std::make_shared( - partition_by_ast, - query_configuration.format, - sample_block, - local_context, - format_settings, - chosen_compression_method, - query_configuration, - query_configuration.url.bucket, - key); - } - else - { - if (auto new_key = checkAndGetNewFileOnInsertIfNeeded(local_context, query_configuration, query_configuration.keys.front(), query_configuration.keys.size())) - { - std::lock_guard lock{configuration_update_mutex}; - query_configuration.keys.push_back(*new_key); - configuration.keys.push_back(*new_key); - key = *new_key; - } - - return std::make_shared( - query_configuration.format, - sample_block, - local_context, - format_settings, - chosen_compression_method, - query_configuration, - query_configuration.url.bucket, - key); - } -} - -void StorageS3::truncate(const ASTPtr & /* query */, const StorageMetadataPtr &, ContextPtr local_context, TableExclusiveLockHolder &) -{ - auto query_configuration = updateConfigurationAndGetCopy(local_context); - - if (query_configuration.withGlobs()) - { - throw Exception( - ErrorCodes::DATABASE_ACCESS_DENIED, - "S3 key '{}' contains globs, so the table is in readonly mode", - query_configuration.url.key); - } - - Aws::S3::Model::Delete delkeys; - - for (const auto & key : query_configuration.keys) - { - Aws::S3::Model::ObjectIdentifier obj; - obj.SetKey(key); - delkeys.AddObjects(std::move(obj)); - } - - ProfileEvents::increment(ProfileEvents::S3DeleteObjects); - S3::DeleteObjectsRequest request; - request.SetBucket(query_configuration.url.bucket); - request.SetDelete(delkeys); - - auto response = query_configuration.client->DeleteObjects(request); - - const auto * response_error = response.IsSuccess() ? nullptr : &response.GetError(); - auto time_now = std::chrono::system_clock::now(); - if (auto blob_storage_log = BlobStorageLogWriter::create()) - for (const auto & key : query_configuration.keys) - blob_storage_log->addEvent( - BlobStorageLogElement::EventType::Delete, query_configuration.url.bucket, key, {}, 0, response_error, time_now); - - if (!response.IsSuccess()) - { - const auto & err = response.GetError(); - throw S3Exception(err.GetMessage(), err.GetErrorType()); - } - - for (const auto & error : response.GetResult().GetErrors()) - LOG_WARNING(getLogger("StorageS3"), "Failed to delete {}, error: {}", error.GetKey(), error.GetMessage()); -} - -StorageS3::Configuration StorageS3::updateConfigurationAndGetCopy(const ContextPtr & local_context) -{ - std::lock_guard lock(configuration_update_mutex); - configuration.update(local_context); - return configuration; -} - -void StorageS3::updateConfiguration(const ContextPtr & local_context) -{ - std::lock_guard lock(configuration_update_mutex); - configuration.update(local_context); -} - -void StorageS3::useConfiguration(const StorageS3::Configuration & new_configuration) -{ - std::lock_guard lock(configuration_update_mutex); - configuration = new_configuration; -} - -StorageS3::Configuration StorageS3::getConfigurationCopy() const -{ - std::lock_guard lock(configuration_update_mutex); - return configuration; -} - -String StorageS3::getFormatCopy() const -{ - std::lock_guard lock(configuration_update_mutex); - return configuration.format; -} - -bool StorageS3::Configuration::update(const ContextPtr & context) -{ - auto s3_settings = context->getStorageS3Settings().getSettings(url.uri.toString(), context->getUserName()); - request_settings = s3_settings.request_settings; - request_settings.updateFromSettings(context->getSettings()); - - if (client && (static_configuration || !auth_settings.hasUpdates(s3_settings.auth_settings))) - return false; - - auth_settings.updateFrom(s3_settings.auth_settings); - keys[0] = url.key; - connect(context); - return true; -} - -void StorageS3::Configuration::connect(const ContextPtr & context) -{ - const Settings & global_settings = context->getGlobalContext()->getSettingsRef(); - const Settings & local_settings = context->getSettingsRef(); - - if (S3::isS3ExpressEndpoint(url.endpoint) && auth_settings.region.empty()) - throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "Region should be explicitly specified for directory buckets"); - - S3::PocoHTTPClientConfiguration client_configuration = S3::ClientFactory::instance().createClientConfiguration( - auth_settings.region, - context->getRemoteHostFilter(), - static_cast(global_settings.s3_max_redirects), - static_cast(global_settings.s3_retry_attempts), - global_settings.enable_s3_requests_logging, - /* for_disk_s3 = */ false, - request_settings.get_request_throttler, - request_settings.put_request_throttler, - url.uri.getScheme()); - - client_configuration.endpointOverride = url.endpoint; - /// seems as we don't use it - client_configuration.maxConnections = static_cast(request_settings.max_connections); - client_configuration.connectTimeoutMs = local_settings.s3_connect_timeout_ms; - client_configuration.http_keep_alive_timeout = S3::DEFAULT_KEEP_ALIVE_TIMEOUT; - client_configuration.http_keep_alive_max_requests = S3::DEFAULT_KEEP_ALIVE_MAX_REQUESTS; - - auto headers = auth_settings.headers; - if (!headers_from_ast.empty()) - headers.insert(headers.end(), headers_from_ast.begin(), headers_from_ast.end()); - - client_configuration.requestTimeoutMs = request_settings.request_timeout_ms; - - S3::ClientSettings client_settings{ - .use_virtual_addressing = url.is_virtual_hosted_style, - .disable_checksum = local_settings.s3_disable_checksum, - .gcs_issue_compose_request = context->getConfigRef().getBool("s3.gcs_issue_compose_request", false), - .is_s3express_bucket = S3::isS3ExpressEndpoint(url.endpoint), - }; - - auto credentials - = Aws::Auth::AWSCredentials(auth_settings.access_key_id, auth_settings.secret_access_key, auth_settings.session_token); - client = S3::ClientFactory::instance().create( - client_configuration, - client_settings, - credentials.GetAWSAccessKeyId(), - credentials.GetAWSSecretKey(), - auth_settings.server_side_encryption_customer_key_base64, - auth_settings.server_side_encryption_kms_config, - std::move(headers), - S3::CredentialsConfiguration{ - auth_settings.use_environment_credentials.value_or(context->getConfigRef().getBool("s3.use_environment_credentials", true)), - auth_settings.use_insecure_imds_request.value_or(context->getConfigRef().getBool("s3.use_insecure_imds_request", false)), - auth_settings.expiration_window_seconds.value_or( - context->getConfigRef().getUInt64("s3.expiration_window_seconds", S3::DEFAULT_EXPIRATION_WINDOW_SECONDS)), - auth_settings.no_sign_request.value_or(context->getConfigRef().getBool("s3.no_sign_request", false)), - }, - credentials.GetSessionToken()); -} - -bool StorageS3::Configuration::withGlobsIgnorePartitionWildcard() const -{ - if (!withPartitionWildcard()) - return withGlobs(); - - return PartitionedSink::replaceWildcards(getPath(), "").find_first_of("*?{") != std::string::npos; -} - -void StorageS3::processNamedCollectionResult(StorageS3::Configuration & configuration, const NamedCollection & collection) -{ - validateNamedCollection(collection, required_configuration_keys, optional_configuration_keys); - - auto filename = collection.getOrDefault("filename", ""); - if (!filename.empty()) - configuration.url = S3::URI(std::filesystem::path(collection.get("url")) / filename); - else - configuration.url = S3::URI(collection.get("url")); - - configuration.auth_settings.access_key_id = collection.getOrDefault("access_key_id", ""); - configuration.auth_settings.secret_access_key = collection.getOrDefault("secret_access_key", ""); - configuration.auth_settings.use_environment_credentials = collection.getOrDefault("use_environment_credentials", 1); - configuration.auth_settings.no_sign_request = collection.getOrDefault("no_sign_request", false); - configuration.auth_settings.expiration_window_seconds - = collection.getOrDefault("expiration_window_seconds", S3::DEFAULT_EXPIRATION_WINDOW_SECONDS); - - configuration.format = collection.getOrDefault("format", configuration.format); - configuration.compression_method - = collection.getOrDefault("compression_method", collection.getOrDefault("compression", "auto")); - configuration.structure = collection.getOrDefault("structure", "auto"); - - configuration.request_settings = S3Settings::RequestSettings(collection); -} - -StorageS3::Configuration StorageS3::getConfiguration(ASTs & engine_args, const ContextPtr & local_context, bool get_format_from_file) -{ - StorageS3::Configuration configuration; - - if (auto named_collection = tryGetNamedCollectionWithOverrides(engine_args, local_context)) - { - processNamedCollectionResult(configuration, *named_collection); - } - else - { - /// Supported signatures: - /// - /// S3('url') - /// S3('url', 'format') - /// S3('url', 'format', 'compression') - /// S3('url', NOSIGN) - /// S3('url', NOSIGN, 'format') - /// S3('url', NOSIGN, 'format', 'compression') - /// S3('url', 'aws_access_key_id', 'aws_secret_access_key') - /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'session_token') - /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format') - /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'session_token', 'format') - /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression') - /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'session_token', 'format', 'compression') - /// with optional headers() function - - size_t count = StorageURL::evalArgsAndCollectHeaders(engine_args, configuration.headers_from_ast, local_context); - - if (count == 0 || count > 6) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Storage S3 requires 1 to 6 positional arguments: " - "url, [NOSIGN | access_key_id, secret_access_key], [session_token], [name of used format], [compression_method], [headers], [extra_credentials]"); - - std::unordered_map engine_args_to_idx; - bool no_sign_request = false; - - /// For 2 arguments we support 2 possible variants: - /// - s3(source, format) - /// - s3(source, NOSIGN) - /// We can distinguish them by looking at the 2-nd argument: check if it's NOSIGN or not. - if (count == 2) - { - auto second_arg = checkAndGetLiteralArgument(engine_args[1], "format/NOSIGN"); - if (boost::iequals(second_arg, "NOSIGN")) - no_sign_request = true; - else - engine_args_to_idx = {{"format", 1}}; - } - /// For 3 arguments we support 2 possible variants: - /// - s3(source, format, compression_method) - /// - s3(source, access_key_id, secret_access_key) - /// - s3(source, NOSIGN, format) - /// We can distinguish them by looking at the 2-nd argument: check if it's NOSIGN or format name. - else if (count == 3) - { - auto second_arg = checkAndGetLiteralArgument(engine_args[1], "format/access_key_id/NOSIGN"); - if (boost::iequals(second_arg, "NOSIGN")) - { - no_sign_request = true; - engine_args_to_idx = {{"format", 2}}; - } - else if (second_arg == "auto" || FormatFactory::instance().exists(second_arg)) - engine_args_to_idx = {{"format", 1}, {"compression_method", 2}}; - else - engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}}; - } - /// For 4 arguments we support 3 possible variants: - /// - s3(source, access_key_id, secret_access_key, session_token) - /// - s3(source, access_key_id, secret_access_key, format) - /// - s3(source, NOSIGN, format, compression_method) - /// We can distinguish them by looking at the 2-nd argument: check if it's a NOSIGN or not. - else if (count == 4) - { - auto second_arg = checkAndGetLiteralArgument(engine_args[1], "access_key_id/NOSIGN"); - if (boost::iequals(second_arg, "NOSIGN")) - { - no_sign_request = true; - engine_args_to_idx = {{"format", 2}, {"compression_method", 3}}; - } - else - { - auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "session_token/format"); - if (fourth_arg == "auto" || FormatFactory::instance().exists(fourth_arg)) - engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}}; - else - engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}}; - } - } - /// For 5 arguments we support 2 possible variants: - /// - s3(source, access_key_id, secret_access_key, session_token, format) - /// - s3(source, access_key_id, secret_access_key, format, compression) - else if (count == 5) - { - auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "session_token/format"); - if (fourth_arg == "auto" || FormatFactory::instance().exists(fourth_arg)) - engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}, {"compression", 4}}; - else - engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}, {"format", 4}}; - } - else if (count == 6) - { - engine_args_to_idx - = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}, {"format", 4}, {"compression_method", 5}}; - } - - /// This argument is always the first - configuration.url = S3::URI(checkAndGetLiteralArgument(engine_args[0], "url")); - - if (engine_args_to_idx.contains("format")) - configuration.format = checkAndGetLiteralArgument(engine_args[engine_args_to_idx["format"]], "format"); - - if (engine_args_to_idx.contains("compression_method")) - configuration.compression_method - = checkAndGetLiteralArgument(engine_args[engine_args_to_idx["compression_method"]], "compression_method"); - - if (engine_args_to_idx.contains("access_key_id")) - configuration.auth_settings.access_key_id - = checkAndGetLiteralArgument(engine_args[engine_args_to_idx["access_key_id"]], "access_key_id"); - - if (engine_args_to_idx.contains("secret_access_key")) - configuration.auth_settings.secret_access_key - = checkAndGetLiteralArgument(engine_args[engine_args_to_idx["secret_access_key"]], "secret_access_key"); - - if (engine_args_to_idx.contains("session_token")) - configuration.auth_settings.session_token - = checkAndGetLiteralArgument(engine_args[engine_args_to_idx["session_token"]], "session_token"); - - if (no_sign_request) - configuration.auth_settings.no_sign_request = no_sign_request; - } - - configuration.static_configuration - = !configuration.auth_settings.access_key_id.empty() || configuration.auth_settings.no_sign_request.has_value(); - - configuration.keys = {configuration.url.key}; - - if (configuration.format == "auto" && get_format_from_file) - { - if (configuration.url.archive_pattern.has_value()) - { - configuration.format = FormatFactory::instance() - .tryGetFormatFromFileName(Poco::URI(configuration.url.archive_pattern.value()).getPath()) - .value_or("auto"); - } - else - { - configuration.format - = FormatFactory::instance().tryGetFormatFromFileName(Poco::URI(configuration.url.uri_str).getPath()).value_or("auto"); - } - } - - return configuration; -} - -ColumnsDescription StorageS3::getTableStructureFromData( - const StorageS3::Configuration & configuration_, const std::optional & format_settings_, const ContextPtr & ctx) -{ - return getTableStructureAndFormatFromDataImpl(configuration_.format, configuration_, format_settings_, ctx).first; -} - -std::pair StorageS3::getTableStructureAndFormatFromData( - const StorageS3::Configuration & configuration, const std::optional & format_settings, const ContextPtr & ctx) -{ - return getTableStructureAndFormatFromDataImpl(std::nullopt, configuration, format_settings, ctx); -} - -class ReadBufferIterator : public IReadBufferIterator, WithContext -{ -public: - ReadBufferIterator( - std::shared_ptr file_iterator_, - const StorageS3Source::KeysWithInfo & read_keys_, - const StorageS3::Configuration & configuration_, - std::optional format_, - const std::optional & format_settings_, - ContextPtr context_) - : WithContext(context_) - , file_iterator(file_iterator_) - , read_keys(read_keys_) - , configuration(configuration_) - , format(std::move(format_)) - , format_settings(format_settings_) - , prev_read_keys_size(read_keys_.size()) - { - } - - Data next() override - { - if (first) - { - /// If format is unknown we iterate through all currently read keys on first iteration and - /// try to determine format by file name. - if (!format) - { - for (const auto & key_with_info : read_keys) - { - if (auto format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName(key_with_info->getFileName())) - { - format = format_from_file_name; - break; - } - } - } - - /// For default mode check cached columns for currently read keys on first iteration. - if (first && getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT) - { - if (auto cached_columns = tryGetColumnsFromCache(read_keys.begin(), read_keys.end())) - return {nullptr, cached_columns, format}; - } - } - - while (true) - { - current_key_with_info = (*file_iterator)(); - - if (!current_key_with_info || current_key_with_info->key.empty()) - { - if (first) - { - if (format) - throw Exception( - ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, - "The table structure cannot be extracted from a {} format file, because there are no files with provided path " - "in S3 or all files are empty. You can specify table structure manually", - *format); - - throw Exception( - ErrorCodes::CANNOT_DETECT_FORMAT, - "The data format cannot be detected by the contents of the files, because there are no files with provided path " - "in S3 or all files are empty. You can specify the format manually"); - } - - return {nullptr, std::nullopt, format}; - } - - if (read_keys.size() > prev_read_keys_size) - { - /// If format is unknown we can try to determine it by new file names. - if (!format) - { - for (auto it = read_keys.begin() + prev_read_keys_size; it != read_keys.end(); ++it) - { - if (auto format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName((*it)->getFileName())) - { - format = format_from_file_name; - break; - } - } - } - - /// Check new files in schema cache if schema inference mode is default. - if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT) - { - auto columns_from_cache = tryGetColumnsFromCache(read_keys.begin() + prev_read_keys_size, read_keys.end()); - if (columns_from_cache) - return {nullptr, columns_from_cache, format}; - } - - prev_read_keys_size = read_keys.size(); - } - - if (getContext()->getSettingsRef().s3_skip_empty_files && current_key_with_info->info && current_key_with_info->info->size == 0) - continue; - - /// In union mode, check cached columns only for current key. - if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::UNION) - { - StorageS3Source::KeysWithInfo keys = {current_key_with_info}; - if (auto columns_from_cache = tryGetColumnsFromCache(keys.begin(), keys.end())) - { - first = false; - return {nullptr, columns_from_cache, format}; - } - } - - int zstd_window_log_max = static_cast(getContext()->getSettingsRef().zstd_window_log_max); - std::unique_ptr impl; - - if (!current_key_with_info->path_in_archive.has_value()) - { - impl = std::make_unique( - configuration.client, - configuration.url.bucket, - current_key_with_info->key, - configuration.url.version_id, - configuration.request_settings, - getContext()->getReadSettings()); - } - else - { - assert(current_key_with_info->archive_reader); - impl = current_key_with_info->archive_reader->readFile( - current_key_with_info->path_in_archive.value(), /*throw_on_not_found=*/true); - } - if (!getContext()->getSettingsRef().s3_skip_empty_files || !impl->eof()) - { - first = false; - return { - wrapReadBufferWithCompressionMethod( - std::move(impl), - current_key_with_info->path_in_archive.has_value() - ? chooseCompressionMethod(current_key_with_info->path_in_archive.value(), configuration.compression_method) - : chooseCompressionMethod(current_key_with_info->key, configuration.compression_method), - zstd_window_log_max), - std::nullopt, - format}; - } - } - } - - void setNumRowsToLastFile(size_t num_rows) override - { - if (!getContext()->getSettingsRef().schema_inference_use_cache_for_s3) - return; - - String source = fs::path(configuration.url.uri.getHost() + std::to_string(configuration.url.uri.getPort())) - / configuration.url.bucket / current_key_with_info->getPath(); - auto key = getKeyForSchemaCache(source, *format, format_settings, getContext()); - StorageS3::getSchemaCache(getContext()).addNumRows(key, num_rows); - } - - void setSchemaToLastFile(const ColumnsDescription & columns) override - { - if (!getContext()->getSettingsRef().schema_inference_use_cache_for_s3 - || getContext()->getSettingsRef().schema_inference_mode != SchemaInferenceMode::UNION) - return; - - String source = fs::path(configuration.url.uri.getHost() + std::to_string(configuration.url.uri.getPort())) - / configuration.url.bucket / current_key_with_info->getPath(); - auto cache_key = getKeyForSchemaCache(source, *format, format_settings, getContext()); - StorageS3::getSchemaCache(getContext()).addColumns(cache_key, columns); - } - - void setResultingSchema(const ColumnsDescription & columns) override - { - if (!getContext()->getSettingsRef().schema_inference_use_cache_for_s3 - || getContext()->getSettingsRef().schema_inference_mode != SchemaInferenceMode::DEFAULT) - return; - - auto host_and_bucket = fs::path(configuration.url.uri.getHost() + std::to_string(configuration.url.uri.getPort())) / configuration.url.bucket; - Strings sources; - sources.reserve(read_keys.size()); - std::transform( - read_keys.begin(), - read_keys.end(), - std::back_inserter(sources), - [&](const auto & elem) { return host_and_bucket / elem->getPath(); }); - auto cache_keys = getKeysForSchemaCache(sources, *format, format_settings, getContext()); - StorageS3::getSchemaCache(getContext()).addManyColumns(cache_keys, columns); - } - - void setFormatName(const String & format_name) override - { - format = format_name; - } - - String getLastFileName() const override - { - if (current_key_with_info) - return current_key_with_info->getPath(); - return ""; - } - - bool supportsLastReadBufferRecreation() const override { return true; } - - std::unique_ptr recreateLastReadBuffer() override - { - chassert(current_key_with_info); - int zstd_window_log_max = static_cast(getContext()->getSettingsRef().zstd_window_log_max); - auto impl = std::make_unique(configuration.client, configuration.url.bucket, current_key_with_info->key, configuration.url.version_id, configuration.request_settings, getContext()->getReadSettings()); - return wrapReadBufferWithCompressionMethod(std::move(impl), chooseCompressionMethod(current_key_with_info->key, configuration.compression_method), zstd_window_log_max); - } - -private: - std::optional tryGetColumnsFromCache( - const StorageS3Source::KeysWithInfo::const_iterator & begin, const StorageS3Source::KeysWithInfo::const_iterator & end) - { - auto context = getContext(); - if (!context->getSettingsRef().schema_inference_use_cache_for_s3) - return std::nullopt; - - auto & schema_cache = StorageS3::getSchemaCache(context); - for (auto it = begin; it < end; ++it) - { - auto get_last_mod_time = [&] - { - time_t last_modification_time = 0; - if ((*it)->info) - { - last_modification_time = (*it)->info->last_modification_time; - } - else - { - /// Note that in case of exception in getObjectInfo returned info will be empty, - /// but schema cache will handle this case and won't return columns from cache - /// because we can't say that it's valid without last modification time. - last_modification_time = S3::getObjectInfo( - *configuration.client, - configuration.url.bucket, - (*it)->key, - configuration.url.version_id, - configuration.request_settings, - /*with_metadata=*/ false, - /*throw_on_error= */ false).last_modification_time; - } - - return last_modification_time ? std::make_optional(last_modification_time) : std::nullopt; - }; - String path = fs::path(configuration.url.bucket) / (*it)->getPath(); - - String source = fs::path(configuration.url.uri.getHost() + std::to_string(configuration.url.uri.getPort())) / path; - - if (format) - { - auto cache_key = getKeyForSchemaCache(source, *format, format_settings, context); - if (auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time)) - return columns; - } - else - { - /// If format is unknown, we can iterate through all possible input formats - /// and check if we have an entry with this format and this file in schema cache. - /// If we have such entry fcreateor some format, we can use this format to read the file. - for (const auto & format_name : FormatFactory::instance().getAllInputFormats()) - { - auto cache_key = getKeyForSchemaCache(source, format_name, format_settings, context); - if (auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time)) - { - /// Now format is known. It should be the same for all files. - format = format_name; - return columns; - } - } - } - } - - return std::nullopt; - } - - std::shared_ptr file_iterator; - const StorageS3Source::KeysWithInfo & read_keys; - const StorageS3::Configuration & configuration; - std::optional format; - const std::optional & format_settings; - StorageS3Source::KeyWithInfoPtr current_key_with_info; - size_t prev_read_keys_size; - bool first = true; -}; - -std::pair StorageS3::getTableStructureAndFormatFromDataImpl( - std::optional format, - const StorageS3::Configuration & configuration, - const std::optional & format_settings, - const ContextPtr & ctx) -{ - KeysWithInfo read_keys; - - auto file_iterator = createFileIterator(configuration, false, ctx, {}, {}, &read_keys); - - ReadBufferIterator read_buffer_iterator(file_iterator, read_keys, configuration, format, format_settings, ctx); - if (format) - return {readSchemaFromFormat(*format, format_settings, read_buffer_iterator, ctx), *format}; - return detectFormatAndReadSchema(format_settings, read_buffer_iterator, ctx); -} - -void registerStorageS3Impl(const String & name, StorageFactory & factory) -{ - factory.registerStorage(name, [](const StorageFactory::Arguments & args) - { - auto & engine_args = args.engine_args; - if (engine_args.empty()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "External data source must have arguments"); - - auto configuration = StorageS3::getConfiguration(engine_args, args.getLocalContext()); - // Use format settings from global server context + settings from - // the SETTINGS clause of the create query. Settings from current - // session and user are ignored. - std::optional format_settings; - if (args.storage_def->settings) - { - FormatFactorySettings user_format_settings; - - // Apply changed settings from global context, but ignore the - // unknown ones, because we only have the format settings here. - const auto & changes = args.getContext()->getSettingsRef().changes(); - for (const auto & change : changes) - { - if (user_format_settings.has(change.name)) - user_format_settings.set(change.name, change.value); - } - - // Apply changes from SETTINGS clause, with validation. - user_format_settings.applyChanges(args.storage_def->settings->changes); - format_settings = getFormatSettings(args.getContext(), user_format_settings); - } - else - { - format_settings = getFormatSettings(args.getContext()); - } - - ASTPtr partition_by; - if (args.storage_def->partition_by) - partition_by = args.storage_def->partition_by->clone(); - - return std::make_shared( - std::move(configuration), - args.getContext(), - args.table_id, - args.columns, - args.constraints, - args.comment, - format_settings, - /* distributed_processing_ */false, - partition_by); - }, - { - .supports_settings = true, - .supports_sort_order = true, // for partition by - .supports_schema_inference = true, - .source_access_type = AccessType::S3, - }); -} - -void registerStorageS3(StorageFactory & factory) -{ - registerStorageS3Impl("S3", factory); - registerStorageS3Impl("COSN", factory); - registerStorageS3Impl("OSS", factory); -} - -bool StorageS3::supportsPartitionBy() const -{ - return true; -} - -SchemaCache & StorageS3::getSchemaCache(const ContextPtr & ctx) -{ - static SchemaCache schema_cache(ctx->getConfigRef().getUInt("schema_inference_cache_max_elements_for_s3", DEFAULT_SCHEMA_CACHE_ELEMENTS)); - return schema_cache; -} -} - -#endif diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h deleted file mode 100644 index 606c677f915..00000000000 --- a/src/Storages/StorageS3.h +++ /dev/null @@ -1,462 +0,0 @@ -#pragma once - -#include -#include -#include "IO/Archives/IArchiveReader.h" -#include "IO/Archives/createArchiveReader.h" -#include "IO/ReadBuffer.h" -#if USE_AWS_S3 - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace DB -{ - -namespace ErrorCodes -{ -extern const int LOGICAL_ERROR; -} - -class PullingPipelineExecutor; -class NamedCollection; - -class StorageS3Source : public SourceWithKeyCondition, WithContext -{ -public: - struct KeyWithInfo - { - KeyWithInfo() = default; - - explicit KeyWithInfo( - String key_, - std::optional info_ = std::nullopt, - std::optional path_in_archive_ = std::nullopt, - std::shared_ptr archive_reader_ = nullptr) - : key(std::move(key_)) - , info(std::move(info_)) - , path_in_archive(std::move(path_in_archive_)) - , archive_reader(std::move(archive_reader_)) - { - if (path_in_archive.has_value() != (archive_reader != nullptr)) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Archive reader and path in archive must exist simultaneously"); - } - - virtual ~KeyWithInfo() = default; - - String key; - std::optional info; - std::optional path_in_archive; - std::shared_ptr archive_reader; - - String getPath() const { return path_in_archive.has_value() ? (key + "::" + path_in_archive.value()) : key; } - String getFileName() const { return path_in_archive.has_value() ? path_in_archive.value() : key; } - }; - - using KeyWithInfoPtr = std::shared_ptr; - - using KeysWithInfo = std::vector; - class IIterator - { - public: - virtual ~IIterator() = default; - virtual KeyWithInfoPtr next(size_t idx = 0) = 0; /// NOLINT - - /// Estimates how many streams we need to process all files. - /// If keys count >= max_threads_count, the returned number may not represent the actual number of the keys. - /// Intended to be called before any next() calls, may underestimate otherwise - /// fixme: May underestimate if the glob has a strong filter, so there are few matches among the first 1000 ListObjects results. - virtual size_t estimatedKeysCount() = 0; - - KeyWithInfoPtr operator()() { return next(); } - }; - - class DisclosedGlobIterator : public IIterator - { - public: - DisclosedGlobIterator( - const S3::Client & client_, - const S3::URI & globbed_uri_, - const ActionsDAG::Node * predicate, - const NamesAndTypesList & virtual_columns, - const ContextPtr & context, - KeysWithInfo * read_keys_ = nullptr, - const S3Settings::RequestSettings & request_settings_ = {}, - std::function progress_callback_ = {}); - - KeyWithInfoPtr next(size_t idx = 0) override; /// NOLINT - size_t estimatedKeysCount() override; - - private: - class Impl; - /// shared_ptr to have copy constructor - std::shared_ptr pimpl; - }; - - class KeysIterator : public IIterator - { - public: - explicit KeysIterator( - const S3::Client & client_, - const std::string & version_id_, - const std::vector & keys_, - const String & bucket_, - const S3Settings::RequestSettings & request_settings_, - KeysWithInfo * read_keys = nullptr, - std::function progress_callback_ = {}); - - KeyWithInfoPtr next(size_t idx = 0) override; /// NOLINT - size_t estimatedKeysCount() override; - - private: - class Impl; - /// shared_ptr to have copy constructor - std::shared_ptr pimpl; - }; - - class ReadTaskIterator : public IIterator - { - public: - explicit ReadTaskIterator(const ReadTaskCallback & callback_, size_t max_threads_count); - - KeyWithInfoPtr next(size_t idx = 0) override; /// NOLINT - size_t estimatedKeysCount() override; - - private: - KeysWithInfo buffer; - std::atomic_size_t index = 0; - - ReadTaskCallback callback; - }; - - class ArchiveIterator : public IIterator, public WithContext - { - public: - explicit ArchiveIterator( - std::unique_ptr basic_iterator_, - const std::string & archive_pattern_, - std::shared_ptr client_, - const String & bucket_, - const String & version_id_, - const S3Settings::RequestSettings & request_settings, - ContextPtr context_, - KeysWithInfo * read_keys_); - - KeyWithInfoPtr next(size_t) override; /// NOLINT - size_t estimatedKeysCount() override; - void refreshArchiveReader(); - - private: - std::unique_ptr basic_iterator; - KeyWithInfoPtr basic_key_with_info_ptr; - std::unique_ptr basic_read_buffer; - std::shared_ptr archive_reader{nullptr}; - std::unique_ptr file_enumerator = nullptr; - std::string path_in_archive = {}; // used when reading a single file from archive - IArchiveReader::NameFilter filter = {}; // used when files inside archive are defined with a glob - std::shared_ptr client; - const String bucket; - const String version_id; - S3Settings::RequestSettings request_settings; - std::mutex take_next_mutex; - KeysWithInfo * read_keys; - }; - - friend StorageS3Source::ArchiveIterator; - - StorageS3Source( - const ReadFromFormatInfo & info, - const String & format, - String name_, - const ContextPtr & context_, - std::optional format_settings_, - UInt64 max_block_size_, - const S3Settings::RequestSettings & request_settings_, - String compression_hint_, - const std::shared_ptr & client_, - const String & bucket, - const String & version_id, - const String & url_host_and_port, - std::shared_ptr file_iterator_, - size_t max_parsing_threads, - bool need_only_count_); - - ~StorageS3Source() override; - - String getName() const override; - - void setKeyCondition(const ActionsDAGPtr & filter_actions_dag, ContextPtr context_) override - { - setKeyConditionImpl(filter_actions_dag, context_, sample_block); - } - - Chunk generate() override; - -private: - friend class StorageS3QueueSource; - - String name; - String bucket; - String version_id; - String url_host_and_port; - String format; - ColumnsDescription columns_desc; - NamesAndTypesList requested_columns; - UInt64 max_block_size; - S3Settings::RequestSettings request_settings; - String compression_hint; - std::shared_ptr client; - Block sample_block; - std::optional format_settings; - - struct ReaderHolder - { - public: - ReaderHolder( - KeyWithInfoPtr key_with_info_, - String bucket_, - std::unique_ptr read_buf_, - std::shared_ptr source_, - std::unique_ptr pipeline_, - std::unique_ptr reader_) - : key_with_info(key_with_info_) - , bucket(std::move(bucket_)) - , read_buf(std::move(read_buf_)) - , source(std::move(source_)) - , pipeline(std::move(pipeline_)) - , reader(std::move(reader_)) - { - } - - ReaderHolder() = default; - ReaderHolder(const ReaderHolder & other) = delete; - ReaderHolder & operator=(const ReaderHolder & other) = delete; - - ReaderHolder(ReaderHolder && other) noexcept { *this = std::move(other); } - - ReaderHolder & operator=(ReaderHolder && other) noexcept - { - /// The order of destruction is important. - /// reader uses pipeline, pipeline uses read_buf. - reader = std::move(other.reader); - pipeline = std::move(other.pipeline); - source = std::move(other.source); - read_buf = std::move(other.read_buf); - key_with_info = std::move(other.key_with_info); - bucket = std::move(other.bucket); - return *this; - } - - explicit operator bool() const { return reader != nullptr; } - PullingPipelineExecutor * operator->() { return reader.get(); } - const PullingPipelineExecutor * operator->() const { return reader.get(); } - String getPath() const { return bucket + "/" + key_with_info->getPath(); } - String getFile() const { return key_with_info->getFileName(); } - bool isArchive() { return key_with_info->path_in_archive.has_value(); } - const KeyWithInfo & getKeyWithInfo() const { return *key_with_info; } - std::optional getFileSize() const { return key_with_info->info ? std::optional(key_with_info->info->size) : std::nullopt; } - - const IInputFormat * getInputFormat() const { return dynamic_cast(source.get()); } - - private: - KeyWithInfoPtr key_with_info; - String bucket; - std::unique_ptr read_buf; - std::shared_ptr source; - std::unique_ptr pipeline; - std::unique_ptr reader; - }; - - ReaderHolder reader; - - NamesAndTypesList requested_virtual_columns; - std::shared_ptr file_iterator; - size_t max_parsing_threads = 1; - bool need_only_count; - - LoggerPtr log = getLogger("StorageS3Source"); - - ThreadPool create_reader_pool; - ThreadPoolCallbackRunnerUnsafe create_reader_scheduler; - std::future reader_future; - std::atomic initialized{false}; - - size_t total_rows_in_file = 0; - - /// Notice: we should initialize reader and future_reader lazily in generate to make sure key_condition - /// is set before createReader is invoked for key_condition is read in createReader. - void lazyInitialize(size_t idx = 0); - - /// Recreate ReadBuffer and Pipeline for each file. - ReaderHolder createReader(size_t idx = 0); - std::future createReaderAsync(size_t idx = 0); - - void addNumRowsToCache(const String & bucket_with_key, size_t num_rows); - std::optional tryGetNumRowsFromCache(const KeyWithInfo & key_with_info); -}; - -/** - * This class represents table engine for external S3 urls. - * It sends HTTP GET to server when select is called and - * HTTP PUT when insert is called. - */ -class StorageS3 : public IStorage -{ -public: - struct Configuration : public StatelessTableEngineConfiguration - { - Configuration() = default; - - const String & getPath() const { return url.key; } - - bool update(const ContextPtr & context); - - void connect(const ContextPtr & context); - - bool withGlobs() const { return url.key.find_first_of("*?{") != std::string::npos; } - - bool withPartitionWildcard() const - { - static const String PARTITION_ID_WILDCARD = "{_partition_id}"; - return url.bucket.find(PARTITION_ID_WILDCARD) != String::npos || keys.back().find(PARTITION_ID_WILDCARD) != String::npos; - } - - bool withGlobsIgnorePartitionWildcard() const; - - S3::URI url; - S3::AuthSettings auth_settings; - S3Settings::RequestSettings request_settings; - /// If s3 configuration was passed from ast, then it is static. - /// If from config - it can be changed with config reload. - bool static_configuration = true; - /// Headers from ast is a part of static configuration. - HTTPHeaderEntries headers_from_ast; - - std::shared_ptr client; - std::vector keys; - }; - - StorageS3( - const Configuration & configuration_, - const ContextPtr & context_, - const StorageID & table_id_, - const ColumnsDescription & columns_, - const ConstraintsDescription & constraints_, - const String & comment, - std::optional format_settings_, - bool distributed_processing_ = false, - ASTPtr partition_by_ = nullptr); - - String getName() const override { return name; } - - void read( - QueryPlan & query_plan, - const Names & column_names, - const StorageSnapshotPtr & storage_snapshot, - SelectQueryInfo & query_info, - ContextPtr context, - QueryProcessingStage::Enum processed_stage, - size_t max_block_size, - size_t num_streams) override; - - SinkToStoragePtr - write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, ContextPtr context, bool async_insert) override; - - void truncate( - const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context, TableExclusiveLockHolder &) override; - - bool supportsPartitionBy() const override; - - static void processNamedCollectionResult(Configuration & configuration, const NamedCollection & collection); - - static SchemaCache & getSchemaCache(const ContextPtr & ctx); - - static Configuration getConfiguration(ASTs & engine_args, const ContextPtr & local_context, bool get_format_from_file = true); - - static ColumnsDescription getTableStructureFromData( - const Configuration & configuration_, const std::optional & format_settings_, const ContextPtr & ctx); - - static std::pair getTableStructureAndFormatFromData( - const Configuration & configuration, const std::optional & format_settings, const ContextPtr & ctx); - - using KeysWithInfo = StorageS3Source::KeysWithInfo; - - bool supportsTrivialCountOptimization(const StorageSnapshotPtr &, ContextPtr) const override { return true; } - -protected: - virtual Configuration updateConfigurationAndGetCopy(const ContextPtr & local_context); - - virtual void updateConfiguration(const ContextPtr & local_context); - - void useConfiguration(const Configuration & new_configuration); - - Configuration getConfigurationCopy() const; - - String getFormatCopy() const; - -private: - friend class StorageS3Cluster; - friend class TableFunctionS3Cluster; - friend class StorageS3Queue; - friend class ReadFromStorageS3Step; - - Configuration configuration; - mutable std::mutex configuration_update_mutex; - - String name; - const bool distributed_processing; - std::optional format_settings; - ASTPtr partition_by; - - static std::pair getTableStructureAndFormatFromDataImpl( - std::optional format, - const Configuration & configuration, - const std::optional & format_settings, - const ContextPtr & ctx); - - bool supportsSubcolumns() const override { return true; } - - bool supportsSubsetOfColumns(const ContextPtr & context) const; - - bool prefersLargeBlocks() const override; - - bool parallelizeOutputAfterReading(ContextPtr context) const override; -}; - -std::unique_ptr createS3ReadBuffer( - const String & key, - size_t object_size, - std::shared_ptr context, - std::shared_ptr client_ptr, - const String & bucket, - const String & version_id, - const S3Settings::RequestSettings & request_settings); - -std::unique_ptr createAsyncS3ReadBuffer( - const String & key, - const ReadSettings & read_settings, - size_t object_size, - std::shared_ptr context, - std::shared_ptr client_ptr, - const String & bucket, - const String & version_id, - const S3Settings::RequestSettings & request_settings); -} - -#endif diff --git a/src/Storages/StorageS3Cluster.cpp b/src/Storages/StorageS3Cluster.cpp deleted file mode 100644 index 0060450eea7..00000000000 --- a/src/Storages/StorageS3Cluster.cpp +++ /dev/null @@ -1,114 +0,0 @@ -#include "Storages/StorageS3Cluster.h" - -#if USE_AWS_S3 - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - -StorageS3Cluster::StorageS3Cluster( - const String & cluster_name_, - const StorageS3::Configuration & configuration_, - const StorageID & table_id_, - const ColumnsDescription & columns_, - const ConstraintsDescription & constraints_, - const ContextPtr & context) - : IStorageCluster(cluster_name_, table_id_, getLogger("StorageS3Cluster (" + table_id_.table_name + ")")) - , s3_configuration{configuration_} -{ - context->getGlobalContext()->getRemoteHostFilter().checkURL(configuration_.url.uri); - context->getGlobalContext()->getHTTPHeaderFilter().checkHeaders(configuration_.headers_from_ast); - - StorageInMemoryMetadata storage_metadata; - updateConfigurationIfChanged(context); - - if (columns_.empty()) - { - ColumnsDescription columns; - /// `format_settings` is set to std::nullopt, because StorageS3Cluster is used only as table function - if (s3_configuration.format == "auto") - std::tie(columns, s3_configuration.format) = StorageS3::getTableStructureAndFormatFromData(s3_configuration, /*format_settings=*/std::nullopt, context); - else - columns = StorageS3::getTableStructureFromData(s3_configuration, /*format_settings=*/std::nullopt, context); - - storage_metadata.setColumns(columns); - } - else - { - if (s3_configuration.format == "auto") - s3_configuration.format = StorageS3::getTableStructureAndFormatFromData(s3_configuration, /*format_settings=*/std::nullopt, context).second; - - storage_metadata.setColumns(columns_); - } - - storage_metadata.setConstraints(constraints_); - setInMemoryMetadata(storage_metadata); - setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns())); -} - -void StorageS3Cluster::updateQueryToSendIfNeeded(DB::ASTPtr & query, const DB::StorageSnapshotPtr & storage_snapshot, const DB::ContextPtr & context) -{ - ASTExpressionList * expression_list = extractTableFunctionArgumentsFromSelectQuery(query); - if (!expression_list) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected SELECT query from table function s3Cluster, got '{}'", queryToString(query)); - - TableFunctionS3Cluster::updateStructureAndFormatArgumentsIfNeeded( - expression_list->children, - storage_snapshot->metadata->getColumns().getAll().toNamesAndTypesDescription(), - s3_configuration.format, - context); -} - -void StorageS3Cluster::updateConfigurationIfChanged(ContextPtr local_context) -{ - s3_configuration.update(local_context); -} - -RemoteQueryExecutor::Extension StorageS3Cluster::getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const -{ - auto iterator = std::make_shared( - *s3_configuration.client, - s3_configuration.url, - predicate, - getVirtualsList(), - context, - nullptr, - s3_configuration.request_settings, - context->getFileProgressCallback()); - - auto callback = std::make_shared>([iterator]() mutable -> String - { - if (auto next = iterator->next()) - return next->key; - return ""; - }); - return RemoteQueryExecutor::Extension{ .task_iterator = std::move(callback) }; -} - -} - -#endif diff --git a/src/Storages/StorageS3Cluster.h b/src/Storages/StorageS3Cluster.h deleted file mode 100644 index 802fd3f9139..00000000000 --- a/src/Storages/StorageS3Cluster.h +++ /dev/null @@ -1,51 +0,0 @@ -#pragma once - -#include "config.h" - -#if USE_AWS_S3 - -#include -#include -#include -#include -#include - -namespace DB -{ - -class Context; - -class StorageS3Cluster : public IStorageCluster -{ -public: - StorageS3Cluster( - const String & cluster_name_, - const StorageS3::Configuration & configuration_, - const StorageID & table_id_, - const ColumnsDescription & columns_, - const ConstraintsDescription & constraints_, - const ContextPtr & context_); - - std::string getName() const override { return "S3Cluster"; } - - RemoteQueryExecutor::Extension getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const override; - - bool supportsSubcolumns() const override { return true; } - - bool supportsTrivialCountOptimization(const StorageSnapshotPtr &, ContextPtr) const override { return true; } - -protected: - void updateConfigurationIfChanged(ContextPtr local_context); - -private: - void updateBeforeRead(const ContextPtr & context) override { updateConfigurationIfChanged(context); } - - void updateQueryToSendIfNeeded(ASTPtr & query, const StorageSnapshotPtr & storage_snapshot, const ContextPtr & context) override; - - StorageS3::Configuration s3_configuration; -}; - - -} - -#endif diff --git a/src/Storages/StorageS3Settings.cpp b/src/Storages/StorageS3Settings.cpp index 04634bcf1b3..b767805f637 100644 --- a/src/Storages/StorageS3Settings.cpp +++ b/src/Storages/StorageS3Settings.cpp @@ -18,18 +18,20 @@ namespace ErrorCodes extern const int INVALID_SETTING_VALUE; } -S3Settings::RequestSettings::PartUploadSettings::PartUploadSettings(const Settings & settings) +S3Settings::RequestSettings::PartUploadSettings::PartUploadSettings(const Settings & settings, bool validate_settings) { - updateFromSettingsImpl(settings, false); - validate(); + updateFromSettings(settings, false); + if (validate_settings) + validate(); } S3Settings::RequestSettings::PartUploadSettings::PartUploadSettings( const Poco::Util::AbstractConfiguration & config, const String & config_prefix, const Settings & settings, - String setting_name_prefix) - : PartUploadSettings(settings) + String setting_name_prefix, + bool validate_settings) + : PartUploadSettings(settings, validate_settings) { String key = config_prefix + "." + setting_name_prefix; strict_upload_part_size = config.getUInt64(key + "strict_upload_part_size", strict_upload_part_size); @@ -46,7 +48,8 @@ S3Settings::RequestSettings::PartUploadSettings::PartUploadSettings( storage_class_name = config.getString(config_prefix + ".s3_storage_class", storage_class_name); storage_class_name = Poco::toUpperInPlace(storage_class_name); - validate(); + if (validate_settings) + validate(); } S3Settings::RequestSettings::PartUploadSettings::PartUploadSettings(const NamedCollection & collection) @@ -65,7 +68,7 @@ S3Settings::RequestSettings::PartUploadSettings::PartUploadSettings(const NamedC validate(); } -void S3Settings::RequestSettings::PartUploadSettings::updateFromSettingsImpl(const Settings & settings, bool if_changed) +void S3Settings::RequestSettings::PartUploadSettings::updateFromSettings(const Settings & settings, bool if_changed) { if (!if_changed || settings.s3_strict_upload_part_size.changed) strict_upload_part_size = settings.s3_strict_upload_part_size; @@ -108,7 +111,7 @@ void S3Settings::RequestSettings::PartUploadSettings::validate() if (max_upload_part_size > max_upload_part_size_limit) throw Exception( ErrorCodes::INVALID_SETTING_VALUE, - "Setting max_upload_part_size has invalid value {} which is grater than the s3 API limit {}", + "Setting max_upload_part_size has invalid value {} which is greater than the s3 API limit {}", ReadableSize(max_upload_part_size), ReadableSize(max_upload_part_size_limit)); if (max_single_part_upload_size > max_upload_part_size_limit) @@ -170,8 +173,8 @@ void S3Settings::RequestSettings::PartUploadSettings::validate() } -S3Settings::RequestSettings::RequestSettings(const Settings & settings) - : upload_settings(settings) +S3Settings::RequestSettings::RequestSettings(const Settings & settings, bool validate_settings) + : upload_settings(settings, validate_settings) { updateFromSettingsImpl(settings, false); } @@ -190,8 +193,9 @@ S3Settings::RequestSettings::RequestSettings( const Poco::Util::AbstractConfiguration & config, const String & config_prefix, const Settings & settings, - String setting_name_prefix) - : upload_settings(config, config_prefix, settings, setting_name_prefix) + String setting_name_prefix, + bool validate_settings) + : upload_settings(config, config_prefix, settings, setting_name_prefix, validate_settings) { String key = config_prefix + "." + setting_name_prefix; max_single_read_retries = config.getUInt64(key + "max_single_read_retries", settings.s3_max_single_read_retries); @@ -262,13 +266,12 @@ void S3Settings::RequestSettings::updateFromSettingsImpl(const Settings & settin request_timeout_ms = settings.s3_request_timeout_ms; } -void S3Settings::RequestSettings::updateFromSettings(const Settings & settings) +void S3Settings::RequestSettings::updateFromSettingsIfChanged(const Settings & settings) { updateFromSettingsImpl(settings, true); - upload_settings.updateFromSettings(settings); + upload_settings.updateFromSettings(settings, true); } - void StorageS3Settings::loadFromConfig(const String & config_elem, const Poco::Util::AbstractConfiguration & config, const Settings & settings) { std::lock_guard lock(mutex); @@ -292,7 +295,7 @@ void StorageS3Settings::loadFromConfig(const String & config_elem, const Poco::U } } -S3Settings StorageS3Settings::getSettings(const String & endpoint, const String & user, bool ignore_user) const +std::optional StorageS3Settings::getSettings(const String & endpoint, const String & user, bool ignore_user) const { std::lock_guard lock(mutex); auto next_prefix_setting = s3_settings.upper_bound(endpoint); diff --git a/src/Storages/StorageS3Settings.h b/src/Storages/StorageS3Settings.h index 0f972db02b1..c3bc8aa6ed6 100644 --- a/src/Storages/StorageS3Settings.h +++ b/src/Storages/StorageS3Settings.h @@ -39,20 +39,19 @@ struct S3Settings size_t max_single_operation_copy_size = 5ULL * 1024 * 1024 * 1024; String storage_class_name; - void updateFromSettings(const Settings & settings) { updateFromSettingsImpl(settings, true); } + void updateFromSettings(const Settings & settings, bool if_changed); void validate(); private: PartUploadSettings() = default; - explicit PartUploadSettings(const Settings & settings); + explicit PartUploadSettings(const Settings & settings, bool validate_settings = true); explicit PartUploadSettings(const NamedCollection & collection); PartUploadSettings( const Poco::Util::AbstractConfiguration & config, const String & config_prefix, const Settings & settings, - String setting_name_prefix = {}); - - void updateFromSettingsImpl(const Settings & settings, bool if_changed); + String setting_name_prefix = {}, + bool validate_settings = true); friend struct RequestSettings; }; @@ -80,7 +79,7 @@ struct S3Settings void setStorageClassName(const String & storage_class_name) { upload_settings.storage_class_name = storage_class_name; } RequestSettings() = default; - explicit RequestSettings(const Settings & settings); + explicit RequestSettings(const Settings & settings, bool validate_settings = true); explicit RequestSettings(const NamedCollection & collection); /// What's the setting_name_prefix, and why do we need it? @@ -94,9 +93,10 @@ struct S3Settings const Poco::Util::AbstractConfiguration & config, const String & config_prefix, const Settings & settings, - String setting_name_prefix = {}); + String setting_name_prefix = {}, + bool validate_settings = true); - void updateFromSettings(const Settings & settings); + void updateFromSettingsIfChanged(const Settings & settings); private: void updateFromSettingsImpl(const Settings & settings, bool if_changed); @@ -112,7 +112,7 @@ class StorageS3Settings public: void loadFromConfig(const String & config_elem, const Poco::Util::AbstractConfiguration & config, const Settings & settings); - S3Settings getSettings(const String & endpoint, const String & user, bool ignore_user = false) const; + std::optional getSettings(const String & endpoint, const String & user, bool ignore_user = false) const; private: mutable std::mutex mutex; diff --git a/src/Storages/StorageSnapshot.cpp b/src/Storages/StorageSnapshot.cpp index 8b087a4a2bc..aada25168f8 100644 --- a/src/Storages/StorageSnapshot.cpp +++ b/src/Storages/StorageSnapshot.cpp @@ -115,7 +115,7 @@ std::optional StorageSnapshot::tryGetColumn(const GetColumnsOpt { const auto & columns = getMetadataForQuery()->getColumns(); auto column = columns.tryGetColumn(options, column_name); - if (column && (!column->type->hasDynamicSubcolumns() || !options.with_extended_objects)) + if (column && (!column->type->hasDynamicSubcolumnsDeprecated() || !options.with_extended_objects)) return column; if (options.with_extended_objects) diff --git a/src/Storages/StorageURL.h b/src/Storages/StorageURL.h index 5aca3df1513..f550ccb2bc4 100644 --- a/src/Storages/StorageURL.h +++ b/src/Storages/StorageURL.h @@ -295,6 +295,8 @@ public: bool supportsSubcolumns() const override { return true; } + bool supportsDynamicSubcolumns() const override { return true; } + static FormatSettings getFormatSettingsFromArgs(const StorageFactory::Arguments & args); struct Configuration : public StatelessTableEngineConfiguration diff --git a/src/Storages/StorageURLCluster.h b/src/Storages/StorageURLCluster.h index c80cdec74a2..a6334e7430d 100644 --- a/src/Storages/StorageURLCluster.h +++ b/src/Storages/StorageURLCluster.h @@ -35,6 +35,8 @@ public: bool supportsSubcolumns() const override { return true; } + bool supportsDynamicSubcolumns() const override { return true; } + bool supportsTrivialCountOptimization(const StorageSnapshotPtr &, ContextPtr) const override { return true; } private: diff --git a/src/Storages/System/StorageSystemContributors.generated.cpp b/src/Storages/System/StorageSystemContributors.generated.cpp index 909599c00af..b42b070d518 100644 --- a/src/Storages/System/StorageSystemContributors.generated.cpp +++ b/src/Storages/System/StorageSystemContributors.generated.cpp @@ -48,6 +48,7 @@ const char * auto_contributors[] { "Alex Cao", "Alex Cheng", "Alex Karo", + "Alex Katsman", "Alex Krash", "Alex Ryndin", "Alex Zatelepin", @@ -101,6 +102,7 @@ const char * auto_contributors[] { "Alexey Korepanov", "Alexey Milovidov", "Alexey Perevyshin", + "Alexey Petrunyaka", "Alexey Tronov", "Alexey Vasiliev", "Alexey Zatelepin", @@ -109,6 +111,7 @@ const char * auto_contributors[] { "AlfVII", "Alfonso Martinez", "Alfred Xu", + "Ali", "Ali Demirci", "Aliaksandr Pliutau", "Aliaksandr Shylau", @@ -250,6 +253,7 @@ const char * auto_contributors[] { "Brian Hunter", "Brokenice0415", "Bulat Gaifullin", + "Caio Ricciuti", "Camden Cheek", "Camilo Sierra", "Carbyn", @@ -384,6 +388,7 @@ const char * auto_contributors[] { "Evgenii Pravda", "Evgeniia Sudarikova", "Evgeniy Gatov", + "Evgeniy Leko", "Evgeniy Udodov", "Evgeny", "Evgeny Konkov", @@ -413,6 +418,7 @@ const char * auto_contributors[] { "Fille", "Flowyi", "Francisco Barón", + "Francisco Javier Jurado Moreno", "Frank Chen", "Frank Zhao", "François Violette", @@ -425,6 +431,7 @@ const char * auto_contributors[] { "G5.Qin", "Gabriel", "Gabriel Archer", + "Gabriel Martinez", "Gagan Arneja", "Gagan Goel", "Gao Qiang", @@ -446,6 +453,7 @@ const char * auto_contributors[] { "Grigory Buteyko", "Grigory Pervakov", "GruffGemini", + "Grégoire Pineau", "Guillaume Tassery", "Guo Wangyang", "Guo Wei (William)", @@ -587,6 +595,7 @@ const char * auto_contributors[] { "Keiji Yoshida", "Ken Chen", "Ken MacInnis", + "KenL", "Kenji Noguchi", "Kerry Clendinning", "Kevin Chiang", @@ -640,6 +649,7 @@ const char * auto_contributors[] { "Leonardo Maciel", "Leonid Krylov", "Leopold Schabel", + "Leticia Webb", "Lev Borodin", "Lewinma", "Li Shuai", @@ -701,6 +711,7 @@ const char * auto_contributors[] { "Masha", "Mathieu Rey", "Matthew Peveler", + "Mattias Naarttijärvi", "Matwey V. Kornilov", "Max", "Max Akhmedov", @@ -711,6 +722,7 @@ const char * auto_contributors[] { "MaxTheHuman", "MaxWk", "Maxim Akhmedov", + "Maxim Alexeev", "Maxim Babenko", "Maxim Fedotov", "Maxim Fridental", @@ -739,6 +751,7 @@ const char * auto_contributors[] { "Michael Razuvaev", "Michael Schnerring", "Michael Smitasin", + "Michael Stetsyuk", "Michail Safronov", "Michal Lisowski", "MicrochipQ", @@ -879,6 +892,7 @@ const char * auto_contributors[] { "Pavlo Bashynskiy", "Pawel Rog", "Paweł Kudzia", + "Pazitiff9", "Peignon Melvyn", "Peng Jian", "Peng Liu", @@ -1084,6 +1098,7 @@ const char * auto_contributors[] { "Tom Bombadil", "Tom Risse", "Tomas Barton", + "Tomer Shafir", "Tomáš Hromada", "Tristan", "Tsarkova Anastasia", @@ -1123,6 +1138,7 @@ const char * auto_contributors[] { "Victor Krasnov", "Victor Tarnavsky", "Viktor Taranenko", + "Vinay Suryadevara", "Vincent", "Vincent Bernat", "Vitalii S", @@ -1162,6 +1178,9 @@ const char * auto_contributors[] { "Vladislav Smirnov", "Vladislav V", "Vojtech Splichal", + "Volodya", + "Volodya Giro", + "Volodyachan", "Volodymyr Kuznetsov", "Vsevolod Orlov", "Vxider", @@ -1179,6 +1198,7 @@ const char * auto_contributors[] { "XenoAmess", "Xianda Ke", "Xiang Zhou", + "Xiaofei Hu", "Xin Wang", "Xoel Lopez Barata", "Xudong Zhang", @@ -1224,6 +1244,7 @@ const char * auto_contributors[] { "Zhipeng", "Zhuo Qiu", "Zijie Lu", + "Zimu Li", "Ziy1-Tan", "Zoran Pandovski", "[데이터플랫폼팀] 이호선", @@ -1490,6 +1511,7 @@ const char * auto_contributors[] { "jiyoungyoooo", "jktng", "jkuklis", + "joe09@foxmail.com", "joelynch", "johanngan", "johnnymatthews", @@ -1658,6 +1680,7 @@ const char * auto_contributors[] { "ongkong", "orantius", "p0ny", + "p1rattttt", "palasonicq", "palegre-tiny", "pawelsz-rb", @@ -1667,6 +1690,7 @@ const char * auto_contributors[] { "pedro.riera", "pengxiangcai", "peshkurov", + "pet74alex", "peter279k", "philip.han", "pingyu", @@ -1680,6 +1704,7 @@ const char * auto_contributors[] { "pyos", "pzhdfy", "qaziqarta", + "qiangxuhui", "qianlixiang", "qianmoQ", "qieqieplus", @@ -1793,6 +1818,7 @@ const char * auto_contributors[] { "unknown", "urgordeadbeef", "usurai", + "v01dxyz", "vahid-sohrabloo", "vdimir", "velavokr", @@ -1802,6 +1828,7 @@ const char * auto_contributors[] { "vic", "vicdashkov", "vicgao", + "vinay92-ch", "vinity", "vitac", "vitstn", @@ -1818,6 +1845,7 @@ const char * auto_contributors[] { "weeds085490", "whysage", "wineternity", + "woodlzm", "wuxiaobai24", "wxybear", "wzl", @@ -1877,6 +1905,7 @@ const char * auto_contributors[] { "zhenjial", "zhifeng", "zhongyuankai", + "zhou", "zhoubintao", "zhukai", "zimv", @@ -1891,6 +1920,7 @@ const char * auto_contributors[] { "zxealous", "zy-kkk", "zzsmdfj", + "zzyReal666", "Šimon Podlipský", "Александр", "Александр Нам", diff --git a/src/Storages/System/StorageSystemSchemaInferenceCache.cpp b/src/Storages/System/StorageSystemSchemaInferenceCache.cpp index 634089bd1cd..b67a8b23e9d 100644 --- a/src/Storages/System/StorageSystemSchemaInferenceCache.cpp +++ b/src/Storages/System/StorageSystemSchemaInferenceCache.cpp @@ -1,9 +1,7 @@ #include #include -#include #include -#include -#include +#include #include #include #include @@ -11,6 +9,9 @@ #include #include #include +#include +#include +#include namespace DB { @@ -76,14 +77,14 @@ void StorageSystemSchemaInferenceCache::fillData(MutableColumns & res_columns, C { fillDataImpl(res_columns, StorageFile::getSchemaCache(context), "File"); #if USE_AWS_S3 - fillDataImpl(res_columns, StorageS3::getSchemaCache(context), "S3"); + fillDataImpl(res_columns, StorageObjectStorage::getSchemaCache(context, StorageS3Configuration::type_name), "S3"); #endif #if USE_HDFS - fillDataImpl(res_columns, StorageHDFS::getSchemaCache(context), "HDFS"); + fillDataImpl(res_columns, StorageObjectStorage::getSchemaCache(context, StorageHDFSConfiguration::type_name), "HDFS"); #endif fillDataImpl(res_columns, StorageURL::getSchemaCache(context), "URL"); #if USE_AZURE_BLOB_STORAGE - fillDataImpl(res_columns, StorageAzureBlob::getSchemaCache(context), "Azure"); + fillDataImpl(res_columns, StorageObjectStorage::getSchemaCache(context, StorageAzureConfiguration::type_name), "Azure"); #endif } diff --git a/src/Storages/UVLoop.h b/src/Storages/UVLoop.h index dd1d64973d1..907a3fc0b13 100644 --- a/src/Storages/UVLoop.h +++ b/src/Storages/UVLoop.h @@ -57,9 +57,9 @@ public: } } - inline uv_loop_t * getLoop() { return loop_ptr.get(); } + uv_loop_t * getLoop() { return loop_ptr.get(); } - inline const uv_loop_t * getLoop() const { return loop_ptr.get(); } + const uv_loop_t * getLoop() const { return loop_ptr.get(); } private: std::unique_ptr loop_ptr; diff --git a/src/Storages/Utils.cpp b/src/Storages/Utils.cpp new file mode 100644 index 00000000000..ff73888e19d --- /dev/null +++ b/src/Storages/Utils.cpp @@ -0,0 +1,30 @@ +#include +#include + + +namespace CurrentMetrics +{ + extern const Metric AttachedTable; + extern const Metric AttachedView; + extern const Metric AttachedDictionary; +} + + +namespace DB +{ + CurrentMetrics::Metric getAttachedCounterForStorage(const StoragePtr & storage) + { + if (storage->isView()) + { + return CurrentMetrics::AttachedView; + } + else if (storage->isDictionary()) + { + return CurrentMetrics::AttachedDictionary; + } + else + { + return CurrentMetrics::AttachedTable; + } + } +} diff --git a/src/Storages/Utils.h b/src/Storages/Utils.h new file mode 100644 index 00000000000..c86c2a4c341 --- /dev/null +++ b/src/Storages/Utils.h @@ -0,0 +1,10 @@ +#pragma once + +#include +#include + + +namespace DB +{ + CurrentMetrics::Metric getAttachedCounterForStorage(const StoragePtr & storage); +} diff --git a/src/Storages/getStructureOfRemoteTable.cpp b/src/Storages/getStructureOfRemoteTable.cpp index 26e953c0578..6ea7bdc312d 100644 --- a/src/Storages/getStructureOfRemoteTable.cpp +++ b/src/Storages/getStructureOfRemoteTable.cpp @@ -210,7 +210,7 @@ ColumnsDescriptionByShardNum getExtendedObjectsOfRemoteTables( auto type_name = type_col[i].get(); auto storage_column = storage_columns.tryGetPhysical(name); - if (storage_column && storage_column->type->hasDynamicSubcolumns()) + if (storage_column && storage_column->type->hasDynamicSubcolumnsDeprecated()) res.add(ColumnDescription(std::move(name), DataTypeFactory::instance().get(type_name))); } } diff --git a/src/Storages/registerStorages.cpp b/src/Storages/registerStorages.cpp index ce07c8e8d3e..0fb00c08acc 100644 --- a/src/Storages/registerStorages.cpp +++ b/src/Storages/registerStorages.cpp @@ -43,8 +43,6 @@ void registerStorageIceberg(StorageFactory & factory); #endif #if USE_HDFS -void registerStorageHDFS(StorageFactory & factory); - #if USE_HIVE void registerStorageHive(StorageFactory & factory); #endif @@ -97,9 +95,7 @@ void registerStorageSQLite(StorageFactory & factory); void registerStorageKeeperMap(StorageFactory & factory); -#if USE_AZURE_BLOB_STORAGE -void registerStorageAzureBlob(StorageFactory & factory); -#endif +void registerStorageObjectStorage(StorageFactory & factory); void registerStorages() { @@ -129,7 +125,6 @@ void registerStorages() #endif #if USE_AWS_S3 - registerStorageS3(factory); registerStorageHudi(factory); registerStorageS3Queue(factory); @@ -144,12 +139,9 @@ void registerStorages() #endif #if USE_HDFS - registerStorageHDFS(factory); - #if USE_HIVE registerStorageHive(factory); #endif - #endif registerStorageODBC(factory); @@ -197,9 +189,7 @@ void registerStorages() registerStorageKeeperMap(factory); - #if USE_AZURE_BLOB_STORAGE - registerStorageAzureBlob(factory); - #endif + registerStorageObjectStorage(factory); } } diff --git a/src/TableFunctions/ITableFunction.h b/src/TableFunctions/ITableFunction.h index 1946d8e8905..ed7f80e5df9 100644 --- a/src/TableFunctions/ITableFunction.h +++ b/src/TableFunctions/ITableFunction.h @@ -39,7 +39,7 @@ class Context; class ITableFunction : public std::enable_shared_from_this { public: - static inline std::string getDatabaseName() { return "_table_function"; } + static std::string getDatabaseName() { return "_table_function"; } /// Get the main function name. virtual std::string getName() const = 0; diff --git a/src/TableFunctions/ITableFunctionCluster.h b/src/TableFunctions/ITableFunctionCluster.h index 9f56d781bc9..28dc43f350b 100644 --- a/src/TableFunctions/ITableFunctionCluster.h +++ b/src/TableFunctions/ITableFunctionCluster.h @@ -1,13 +1,10 @@ #pragma once -#include "config.h" - #include #include #include #include -#include -#include +#include namespace DB diff --git a/src/TableFunctions/ITableFunctionDataLake.h b/src/TableFunctions/ITableFunctionDataLake.h index 91165ba6705..fe6e5b3e593 100644 --- a/src/TableFunctions/ITableFunctionDataLake.h +++ b/src/TableFunctions/ITableFunctionDataLake.h @@ -1,15 +1,16 @@ #pragma once #include "config.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include -#if USE_AWS_S3 - -# include -# include -# include -# include -# include -# include namespace DB { @@ -23,44 +24,76 @@ public: protected: StoragePtr executeImpl( - const ASTPtr & /*ast_function*/, + const ASTPtr & /* ast_function */, ContextPtr context, const std::string & table_name, - ColumnsDescription /*cached_columns*/, + ColumnsDescription cached_columns, bool /*is_insert_query*/) const override { ColumnsDescription columns; - if (TableFunction::configuration.structure != "auto") - columns = parseColumnsListFromString(TableFunction::configuration.structure, context); + auto configuration = TableFunction::getConfiguration(); + if (configuration->structure != "auto") + columns = parseColumnsListFromString(configuration->structure, context); + else if (!cached_columns.empty()) + columns = cached_columns; StoragePtr storage = Storage::create( - TableFunction::configuration, context, LoadingStrictnessLevel::CREATE, StorageID(TableFunction::getDatabaseName(), table_name), - columns, ConstraintsDescription{}, String{}, std::nullopt); + configuration, context, StorageID(TableFunction::getDatabaseName(), table_name), + columns, ConstraintsDescription{}, String{}, std::nullopt, LoadingStrictnessLevel::CREATE); storage->startup(); return storage; } - const char * getStorageTypeName() const override { return Storage::name; } + const char * getStorageTypeName() const override { return name; } - ColumnsDescription getActualTableStructure(ContextPtr context, bool /*is_insert_query*/) const override + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override { - if (TableFunction::configuration.structure == "auto") + auto configuration = TableFunction::getConfiguration(); + if (configuration->structure == "auto") { context->checkAccess(TableFunction::getSourceAccessType()); - return Storage::getTableStructureFromData(TableFunction::configuration, std::nullopt, context); + auto object_storage = TableFunction::getObjectStorage(context, !is_insert_query); + return Storage::getTableStructureFromData(object_storage, configuration, std::nullopt, context); + } + else + { + return parseColumnsListFromString(configuration->structure, context); } - - return parseColumnsListFromString(TableFunction::configuration.structure, context); } void parseArguments(const ASTPtr & ast_function, ContextPtr context) override { + auto configuration = TableFunction::getConfiguration(); + configuration->format = "Parquet"; /// Set default format to Parquet if it's not specified in arguments. - TableFunction::configuration.format = "Parquet"; TableFunction::parseArguments(ast_function, context); } }; -} +struct TableFunctionIcebergName +{ + static constexpr auto name = "iceberg"; +}; + +struct TableFunctionDeltaLakeName +{ + static constexpr auto name = "deltaLake"; +}; + +struct TableFunctionHudiName +{ + static constexpr auto name = "hudi"; +}; + +#if USE_AWS_S3 +#if USE_AVRO +using TableFunctionIceberg = ITableFunctionDataLake; #endif +#if USE_PARQUET +using TableFunctionDeltaLake = ITableFunctionDataLake; +#endif +using TableFunctionHudi = ITableFunctionDataLake; +#endif + +} diff --git a/src/TableFunctions/TableFunctionAzureBlobStorage.cpp b/src/TableFunctions/TableFunctionAzureBlobStorage.cpp deleted file mode 100644 index 7a17db2a1a8..00000000000 --- a/src/TableFunctions/TableFunctionAzureBlobStorage.cpp +++ /dev/null @@ -1,395 +0,0 @@ -#include "config.h" - -#if USE_AZURE_BLOB_STORAGE - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "registerTableFunctions.h" -#include -#include -#include - -#include - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int BAD_ARGUMENTS; -} - -namespace -{ - -bool isConnectionString(const std::string & candidate) -{ - return !candidate.starts_with("http"); -} - -} - -void TableFunctionAzureBlobStorage::parseArgumentsImpl(ASTs & engine_args, const ContextPtr & local_context) -{ - /// Supported signatures: - /// - /// AzureBlobStorage(connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure]) - /// - - if (auto named_collection = tryGetNamedCollectionWithOverrides(engine_args, local_context)) - { - StorageAzureBlob::processNamedCollectionResult(configuration, *named_collection); - - configuration.blobs_paths = {configuration.blob_path}; - - if (configuration.format == "auto") - configuration.format = FormatFactory::instance().tryGetFormatFromFileName(configuration.blob_path).value_or("auto"); - } - else - { - if (engine_args.size() < 3 || engine_args.size() > 8) - throw Exception( - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Storage Azure requires 3 to 7 arguments: " - "AzureBlobStorage(connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])"); - - for (auto & engine_arg : engine_args) - engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, local_context); - - std::unordered_map engine_args_to_idx; - - configuration.connection_url = checkAndGetLiteralArgument(engine_args[0], "connection_string/storage_account_url"); - configuration.is_connection_string = isConnectionString(configuration.connection_url); - - configuration.container = checkAndGetLiteralArgument(engine_args[1], "container"); - configuration.blob_path = checkAndGetLiteralArgument(engine_args[2], "blobpath"); - - auto is_format_arg - = [](const std::string & s) -> bool { return s == "auto" || FormatFactory::instance().exists(s); }; - - if (engine_args.size() == 4) - { - auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name/structure"); - if (is_format_arg(fourth_arg)) - { - configuration.format = fourth_arg; - } - else - { - configuration.structure = fourth_arg; - } - } - else if (engine_args.size() == 5) - { - auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); - if (is_format_arg(fourth_arg)) - { - configuration.format = fourth_arg; - configuration.compression_method = checkAndGetLiteralArgument(engine_args[4], "compression"); - } - else - { - configuration.account_name = fourth_arg; - configuration.account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); - } - } - else if (engine_args.size() == 6) - { - auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); - if (is_format_arg(fourth_arg)) - { - configuration.format = fourth_arg; - configuration.compression_method = checkAndGetLiteralArgument(engine_args[4], "compression"); - configuration.structure = checkAndGetLiteralArgument(engine_args[5], "structure"); - } - else - { - configuration.account_name = fourth_arg; - configuration.account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); - auto sixth_arg = checkAndGetLiteralArgument(engine_args[5], "format/account_name/structure"); - if (is_format_arg(sixth_arg)) - configuration.format = sixth_arg; - else - configuration.structure = sixth_arg; - } - } - else if (engine_args.size() == 7) - { - auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); - configuration.account_name = fourth_arg; - configuration.account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); - auto sixth_arg = checkAndGetLiteralArgument(engine_args[5], "format/account_name"); - if (!is_format_arg(sixth_arg)) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown format {}", sixth_arg); - configuration.format = sixth_arg; - configuration.compression_method = checkAndGetLiteralArgument(engine_args[6], "compression"); - } - else if (engine_args.size() == 8) - { - auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); - configuration.account_name = fourth_arg; - configuration.account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); - auto sixth_arg = checkAndGetLiteralArgument(engine_args[5], "format/account_name"); - if (!is_format_arg(sixth_arg)) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown format {}", sixth_arg); - configuration.format = sixth_arg; - configuration.compression_method = checkAndGetLiteralArgument(engine_args[6], "compression"); - configuration.structure = checkAndGetLiteralArgument(engine_args[7], "structure"); - } - - configuration.blobs_paths = {configuration.blob_path}; - - if (configuration.format == "auto") - configuration.format = FormatFactory::instance().tryGetFormatFromFileName(configuration.blob_path).value_or("auto"); - } -} - -void TableFunctionAzureBlobStorage::parseArguments(const ASTPtr & ast_function, ContextPtr context) -{ - /// Clone ast function, because we can modify its arguments like removing headers. - auto ast_copy = ast_function->clone(); - - ASTs & args_func = ast_function->children; - - if (args_func.size() != 1) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Table function '{}' must have arguments.", getName()); - - auto & args = args_func.at(0)->children; - - parseArgumentsImpl(args, context); -} - -void TableFunctionAzureBlobStorage::updateStructureAndFormatArgumentsIfNeeded(ASTs & args, const String & structure, const String & format, const ContextPtr & context) -{ - if (auto collection = tryGetNamedCollectionWithOverrides(args, context)) - { - /// In case of named collection, just add key-value pairs "format='...', structure='...'" - /// at the end of arguments to override existed format and structure with "auto" values. - if (collection->getOrDefault("format", "auto") == "auto") - { - ASTs format_equal_func_args = {std::make_shared("format"), std::make_shared(format)}; - auto format_equal_func = makeASTFunction("equals", std::move(format_equal_func_args)); - args.push_back(format_equal_func); - } - if (collection->getOrDefault("structure", "auto") == "auto") - { - ASTs structure_equal_func_args = {std::make_shared("structure"), std::make_shared(structure)}; - auto structure_equal_func = makeASTFunction("equals", std::move(structure_equal_func_args)); - args.push_back(structure_equal_func); - } - } - else - { - if (args.size() < 3 || args.size() > 8) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Storage Azure requires 3 to 7 arguments: " - "AzureBlobStorage(connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])"); - - auto format_literal = std::make_shared(format); - auto structure_literal = std::make_shared(structure); - - for (auto & arg : args) - arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context); - - auto is_format_arg - = [](const std::string & s) -> bool { return s == "auto" || FormatFactory::instance().exists(s); }; - - /// (connection_string, container_name, blobpath) - if (args.size() == 3) - { - args.push_back(format_literal); - /// Add compression = "auto" before structure argument. - args.push_back(std::make_shared("auto")); - args.push_back(structure_literal); - } - /// (connection_string, container_name, blobpath, structure) or - /// (connection_string, container_name, blobpath, format) - /// We can distinguish them by looking at the 4-th argument: check if it's format name or not. - else if (args.size() == 4) - { - auto fourth_arg = checkAndGetLiteralArgument(args[3], "format/account_name/structure"); - /// (..., format) -> (..., format, compression, structure) - if (is_format_arg(fourth_arg)) - { - if (fourth_arg == "auto") - args[3] = format_literal; - /// Add compression=auto before structure argument. - args.push_back(std::make_shared("auto")); - args.push_back(structure_literal); - } - /// (..., structure) -> (..., format, compression, structure) - else - { - auto structure_arg = args.back(); - args[3] = format_literal; - /// Add compression=auto before structure argument. - args.push_back(std::make_shared("auto")); - if (fourth_arg == "auto") - args.push_back(structure_literal); - else - args.push_back(structure_arg); - } - } - /// (connection_string, container_name, blobpath, format, compression) or - /// (storage_account_url, container_name, blobpath, account_name, account_key) - /// We can distinguish them by looking at the 4-th argument: check if it's format name or not. - else if (args.size() == 5) - { - auto fourth_arg = checkAndGetLiteralArgument(args[3], "format/account_name"); - /// (..., format, compression) -> (..., format, compression, structure) - if (is_format_arg(fourth_arg)) - { - if (fourth_arg == "auto") - args[3] = format_literal; - args.push_back(structure_literal); - } - /// (..., account_name, account_key) -> (..., account_name, account_key, format, compression, structure) - else - { - args.push_back(format_literal); - /// Add compression=auto before structure argument. - args.push_back(std::make_shared("auto")); - args.push_back(structure_literal); - } - } - /// (connection_string, container_name, blobpath, format, compression, structure) or - /// (storage_account_url, container_name, blobpath, account_name, account_key, structure) or - /// (storage_account_url, container_name, blobpath, account_name, account_key, format) - else if (args.size() == 6) - { - auto fourth_arg = checkAndGetLiteralArgument(args[3], "format/account_name"); - auto sixth_arg = checkAndGetLiteralArgument(args[5], "format/structure"); - - /// (..., format, compression, structure) - if (is_format_arg(fourth_arg)) - { - if (fourth_arg == "auto") - args[3] = format_literal; - if (checkAndGetLiteralArgument(args[5], "structure") == "auto") - args[5] = structure_literal; - } - /// (..., account_name, account_key, format) -> (..., account_name, account_key, format, compression, structure) - else if (is_format_arg(sixth_arg)) - { - if (sixth_arg == "auto") - args[5] = format_literal; - /// Add compression=auto before structure argument. - args.push_back(std::make_shared("auto")); - args.push_back(structure_literal); - } - /// (..., account_name, account_key, structure) -> (..., account_name, account_key, format, compression, structure) - else - { - auto structure_arg = args.back(); - args[5] = format_literal; - /// Add compression=auto before structure argument. - args.push_back(std::make_shared("auto")); - if (sixth_arg == "auto") - args.push_back(structure_literal); - else - args.push_back(structure_arg); - } - } - /// (storage_account_url, container_name, blobpath, account_name, account_key, format, compression) - else if (args.size() == 7) - { - /// (..., format, compression) -> (..., format, compression, structure) - if (checkAndGetLiteralArgument(args[5], "format") == "auto") - args[5] = format_literal; - args.push_back(structure_literal); - } - /// (storage_account_url, container_name, blobpath, account_name, account_key, format, compression, structure) - else if (args.size() == 8) - { - if (checkAndGetLiteralArgument(args[5], "format") == "auto") - args[5] = format_literal; - if (checkAndGetLiteralArgument(args[7], "structure") == "auto") - args[7] = structure_literal; - } - } -} - -ColumnsDescription TableFunctionAzureBlobStorage::getActualTableStructure(ContextPtr context, bool is_insert_query) const -{ - if (configuration.structure == "auto") - { - context->checkAccess(getSourceAccessType()); - auto client = StorageAzureBlob::createClient(configuration, !is_insert_query); - auto settings = StorageAzureBlob::createSettings(context); - - auto object_storage = std::make_unique("AzureBlobStorageTableFunction", std::move(client), std::move(settings), configuration.container, configuration.getConnectionURL().toString()); - if (configuration.format == "auto") - return StorageAzureBlob::getTableStructureAndFormatFromData(object_storage.get(), configuration, std::nullopt, context).first; - return StorageAzureBlob::getTableStructureFromData(object_storage.get(), configuration, std::nullopt, context); - } - - return parseColumnsListFromString(configuration.structure, context); -} - -bool TableFunctionAzureBlobStorage::supportsReadingSubsetOfColumns(const ContextPtr & context) -{ - return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration.format, context); -} - -std::unordered_set TableFunctionAzureBlobStorage::getVirtualsToCheckBeforeUsingStructureHint() const -{ - return VirtualColumnUtils::getVirtualNamesForFileLikeStorage(); -} - -StoragePtr TableFunctionAzureBlobStorage::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/, bool is_insert_query) const -{ - auto client = StorageAzureBlob::createClient(configuration, !is_insert_query); - auto settings = StorageAzureBlob::createSettings(context); - - ColumnsDescription columns; - if (configuration.structure != "auto") - columns = parseColumnsListFromString(configuration.structure, context); - else if (!structure_hint.empty()) - columns = structure_hint; - - StoragePtr storage = std::make_shared( - configuration, - std::make_unique(table_name, std::move(client), std::move(settings), configuration.container, configuration.getConnectionURL().toString()), - context, - StorageID(getDatabaseName(), table_name), - columns, - ConstraintsDescription{}, - String{}, - /// No format_settings for table function Azure - std::nullopt, - /* distributed_processing */ false, - nullptr); - - storage->startup(); - - return storage; -} - -void registerTableFunctionAzureBlobStorage(TableFunctionFactory & factory) -{ - factory.registerFunction( - {.documentation - = {.description=R"(The table function can be used to read the data stored on Azure Blob Storage.)", - .examples{{"azureBlobStorage", "SELECT * FROM azureBlobStorage(connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])", ""}}}, - .allow_readonly = false}); -} - -} - -#endif diff --git a/src/TableFunctions/TableFunctionAzureBlobStorage.h b/src/TableFunctions/TableFunctionAzureBlobStorage.h deleted file mode 100644 index 9622881b417..00000000000 --- a/src/TableFunctions/TableFunctionAzureBlobStorage.h +++ /dev/null @@ -1,80 +0,0 @@ -#pragma once - -#include "config.h" - -#if USE_AZURE_BLOB_STORAGE - -#include -#include - - -namespace DB -{ - -class Context; - -/* AzureBlob(source, [access_key_id, secret_access_key,] [format, compression, structure]) - creates a temporary storage for a file in AzureBlob. - */ -class TableFunctionAzureBlobStorage : public ITableFunction -{ -public: - static constexpr auto name = "azureBlobStorage"; - - static constexpr auto signature = " - connection_string, container_name, blobpath\n" - " - connection_string, container_name, blobpath, structure \n" - " - connection_string, container_name, blobpath, format \n" - " - connection_string, container_name, blobpath, format, compression \n" - " - connection_string, container_name, blobpath, format, compression, structure \n" - " - storage_account_url, container_name, blobpath, account_name, account_key\n" - " - storage_account_url, container_name, blobpath, account_name, account_key, structure\n" - " - storage_account_url, container_name, blobpath, account_name, account_key, format\n" - " - storage_account_url, container_name, blobpath, account_name, account_key, format, compression\n" - " - storage_account_url, container_name, blobpath, account_name, account_key, format, compression, structure\n"; - - static size_t getMaxNumberOfArguments() { return 8; } - - String getName() const override - { - return name; - } - - virtual String getSignature() const - { - return signature; - } - - bool hasStaticStructure() const override { return configuration.structure != "auto"; } - - bool needStructureHint() const override { return configuration.structure == "auto"; } - - void setStructureHint(const ColumnsDescription & structure_hint_) override { structure_hint = structure_hint_; } - - bool supportsReadingSubsetOfColumns(const ContextPtr & context) override; - - std::unordered_set getVirtualsToCheckBeforeUsingStructureHint() const override; - - virtual void parseArgumentsImpl(ASTs & args, const ContextPtr & context); - - static void updateStructureAndFormatArgumentsIfNeeded(ASTs & args, const String & structure, const String & format, const ContextPtr & context); - -protected: - - StoragePtr executeImpl( - const ASTPtr & ast_function, - ContextPtr context, - const std::string & table_name, - ColumnsDescription cached_columns, - bool is_insert_query) const override; - - const char * getStorageTypeName() const override { return "Azure"; } - - ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; - void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; - - mutable StorageAzureBlob::Configuration configuration; - ColumnsDescription structure_hint; -}; - -} - -#endif diff --git a/src/TableFunctions/TableFunctionAzureBlobStorageCluster.cpp b/src/TableFunctions/TableFunctionAzureBlobStorageCluster.cpp deleted file mode 100644 index 02b24dccf86..00000000000 --- a/src/TableFunctions/TableFunctionAzureBlobStorageCluster.cpp +++ /dev/null @@ -1,83 +0,0 @@ -#include "config.h" - -#if USE_AZURE_BLOB_STORAGE - -#include -#include -#include -#include - -#include "registerTableFunctions.h" - -#include - - -namespace DB -{ - -StoragePtr TableFunctionAzureBlobStorageCluster::executeImpl( - const ASTPtr & /*function*/, ContextPtr context, - const std::string & table_name, ColumnsDescription /*cached_columns*/, bool is_insert_query) const -{ - StoragePtr storage; - ColumnsDescription columns; - - if (configuration.structure != "auto") - { - columns = parseColumnsListFromString(configuration.structure, context); - } - else if (!structure_hint.empty()) - { - columns = structure_hint; - } - - auto client = StorageAzureBlob::createClient(configuration, !is_insert_query); - auto settings = StorageAzureBlob::createSettings(context); - - if (context->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY) - { - /// On worker node this filename won't contains globs - storage = std::make_shared( - configuration, - std::make_unique(table_name, std::move(client), std::move(settings), configuration.container, configuration.getConnectionURL().toString()), - context, - StorageID(getDatabaseName(), table_name), - columns, - ConstraintsDescription{}, - /* comment */String{}, - /* format_settings */std::nullopt, /// No format_settings - /* distributed_processing */ true, - /*partition_by_=*/nullptr); - } - else - { - storage = std::make_shared( - cluster_name, - configuration, - std::make_unique(table_name, std::move(client), std::move(settings), configuration.container, configuration.getConnectionURL().toString()), - StorageID(getDatabaseName(), table_name), - columns, - ConstraintsDescription{}, - context); - } - - storage->startup(); - - return storage; -} - - -void registerTableFunctionAzureBlobStorageCluster(TableFunctionFactory & factory) -{ - factory.registerFunction( - {.documentation - = {.description=R"(The table function can be used to read the data stored on Azure Blob Storage in parallel for many nodes in a specified cluster.)", - .examples{{"azureBlobStorageCluster", "SELECT * FROM azureBlobStorageCluster(cluster, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])", ""}}}, - .allow_readonly = false} - ); -} - - -} - -#endif diff --git a/src/TableFunctions/TableFunctionAzureBlobStorageCluster.h b/src/TableFunctions/TableFunctionAzureBlobStorageCluster.h deleted file mode 100644 index 58f79328f63..00000000000 --- a/src/TableFunctions/TableFunctionAzureBlobStorageCluster.h +++ /dev/null @@ -1,55 +0,0 @@ -#pragma once - -#include "config.h" - -#if USE_AZURE_BLOB_STORAGE - -#include -#include -#include -#include - - -namespace DB -{ - -class Context; - -/** - * azureBlobStorageCluster(cluster_name, source, [access_key_id, secret_access_key,] format, compression_method, structure) - * A table function, which allows to process many files from Azure Blob Storage on a specific cluster - * On initiator it creates a connection to _all_ nodes in cluster, discloses asterisks - * in Azure Blob Storage file path and dispatch each file dynamically. - * On worker node it asks initiator about next task to process, processes it. - * This is repeated until the tasks are finished. - */ -class TableFunctionAzureBlobStorageCluster : public ITableFunctionCluster -{ -public: - static constexpr auto name = "azureBlobStorageCluster"; - static constexpr auto signature = " - cluster, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure]"; - - String getName() const override - { - return name; - } - - String getSignature() const override - { - return signature; - } - -protected: - StoragePtr executeImpl( - const ASTPtr & ast_function, - ContextPtr context, - const std::string & table_name, - ColumnsDescription cached_columns, - bool is_insert_query) const override; - - const char * getStorageTypeName() const override { return "AzureBlobStorageCluster"; } -}; - -} - -#endif diff --git a/src/TableFunctions/TableFunctionDeltaLake.cpp b/src/TableFunctions/TableFunctionDeltaLake.cpp deleted file mode 100644 index b8bf810f6fa..00000000000 --- a/src/TableFunctions/TableFunctionDeltaLake.cpp +++ /dev/null @@ -1,33 +0,0 @@ -#include "config.h" - -#if USE_AWS_S3 && USE_PARQUET - -#include -#include -#include -#include -#include "registerTableFunctions.h" - -namespace DB -{ - -struct TableFunctionDeltaLakeName -{ - static constexpr auto name = "deltaLake"; -}; - -using TableFunctionDeltaLake = ITableFunctionDataLake; - -void registerTableFunctionDeltaLake(TableFunctionFactory & factory) -{ - factory.registerFunction( - {.documentation = { - .description=R"(The table function can be used to read the DeltaLake table stored on object store.)", - .examples{{"deltaLake", "SELECT * FROM deltaLake(url, access_key_id, secret_access_key)", ""}}, - .categories{"DataLake"}}, - .allow_readonly = false}); -} - -} - -#endif diff --git a/src/TableFunctions/TableFunctionHDFS.cpp b/src/TableFunctions/TableFunctionHDFS.cpp deleted file mode 100644 index 45829245551..00000000000 --- a/src/TableFunctions/TableFunctionHDFS.cpp +++ /dev/null @@ -1,50 +0,0 @@ -#include "config.h" -#include "registerTableFunctions.h" - -#if USE_HDFS -#include -#include -#include -#include -#include -#include -#include - -namespace DB -{ - -StoragePtr TableFunctionHDFS::getStorage( - const String & source, const String & format_, const ColumnsDescription & columns, ContextPtr global_context, - const std::string & table_name, const String & compression_method_) const -{ - return std::make_shared( - source, - StorageID(getDatabaseName(), table_name), - format_, - columns, - ConstraintsDescription{}, - String{}, - global_context, - compression_method_); -} - -ColumnsDescription TableFunctionHDFS::getActualTableStructure(ContextPtr context, bool /*is_insert_query*/) const -{ - if (structure == "auto") - { - context->checkAccess(getSourceAccessType()); - if (format == "auto") - return StorageHDFS::getTableStructureAndFormatFromData(filename, compression_method, context).first; - return StorageHDFS::getTableStructureFromData(format, filename, compression_method, context); - } - - return parseColumnsListFromString(structure, context); -} - -void registerTableFunctionHDFS(TableFunctionFactory & factory) -{ - factory.registerFunction(); -} - -} -#endif diff --git a/src/TableFunctions/TableFunctionHDFS.h b/src/TableFunctions/TableFunctionHDFS.h deleted file mode 100644 index f1c0b8a7eae..00000000000 --- a/src/TableFunctions/TableFunctionHDFS.h +++ /dev/null @@ -1,48 +0,0 @@ -#pragma once - -#include "config.h" - -#if USE_HDFS - -#include - - -namespace DB -{ - -class Context; - -/* hdfs(URI, [format, structure, compression]) - creates a temporary storage from hdfs files - * - */ -class TableFunctionHDFS : public ITableFunctionFileLike -{ -public: - static constexpr auto name = "hdfs"; - static constexpr auto signature = " - uri\n" - " - uri, format\n" - " - uri, format, structure\n" - " - uri, format, structure, compression_method\n"; - - String getName() const override - { - return name; - } - - String getSignature() const override - { - return signature; - } - - ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; - -private: - StoragePtr getStorage( - const String & source, const String & format_, const ColumnsDescription & columns, ContextPtr global_context, - const std::string & table_name, const String & compression_method_) const override; - const char * getStorageTypeName() const override { return "HDFS"; } -}; - -} - -#endif diff --git a/src/TableFunctions/TableFunctionHDFSCluster.cpp b/src/TableFunctions/TableFunctionHDFSCluster.cpp deleted file mode 100644 index 57ce6d2b9ff..00000000000 --- a/src/TableFunctions/TableFunctionHDFSCluster.cpp +++ /dev/null @@ -1,60 +0,0 @@ -#include "config.h" - -#if USE_HDFS - -#include -#include - -#include -#include -#include "registerTableFunctions.h" - -#include - - -namespace DB -{ - -StoragePtr TableFunctionHDFSCluster::getStorage( - const String & /*source*/, const String & /*format_*/, const ColumnsDescription & columns, ContextPtr context, - const std::string & table_name, const String & /*compression_method_*/) const -{ - StoragePtr storage; - if (context->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY) - { - /// On worker node this uri won't contains globs - storage = std::make_shared( - filename, - StorageID(getDatabaseName(), table_name), - format, - columns, - ConstraintsDescription{}, - String{}, - context, - compression_method, - /*distributed_processing=*/true, - nullptr); - } - else - { - storage = std::make_shared( - context, - cluster_name, - filename, - StorageID(getDatabaseName(), table_name), - format, - columns, - ConstraintsDescription{}, - compression_method); - } - return storage; -} - -void registerTableFunctionHDFSCluster(TableFunctionFactory & factory) -{ - factory.registerFunction(); -} - -} - -#endif diff --git a/src/TableFunctions/TableFunctionHDFSCluster.h b/src/TableFunctions/TableFunctionHDFSCluster.h deleted file mode 100644 index 0253217feb7..00000000000 --- a/src/TableFunctions/TableFunctionHDFSCluster.h +++ /dev/null @@ -1,54 +0,0 @@ -#pragma once - -#include "config.h" - -#if USE_HDFS - -#include -#include -#include - - -namespace DB -{ - -class Context; - -/** - * hdfsCluster(cluster, URI, format, structure, compression_method) - * A table function, which allows to process many files from HDFS on a specific cluster - * On initiator it creates a connection to _all_ nodes in cluster, discloses asterisks - * in HDFS file path and dispatch each file dynamically. - * On worker node it asks initiator about next task to process, processes it. - * This is repeated until the tasks are finished. - */ -class TableFunctionHDFSCluster : public ITableFunctionCluster -{ -public: - static constexpr auto name = "hdfsCluster"; - static constexpr auto signature = " - cluster_name, uri\n" - " - cluster_name, uri, format\n" - " - cluster_name, uri, format, structure\n" - " - cluster_name, uri, format, structure, compression_method\n"; - - String getName() const override - { - return name; - } - - String getSignature() const override - { - return signature; - } - -protected: - StoragePtr getStorage( - const String & source, const String & format_, const ColumnsDescription & columns, ContextPtr global_context, - const std::string & table_name, const String & compression_method_) const override; - - const char * getStorageTypeName() const override { return "HDFSCluster"; } -}; - -} - -#endif diff --git a/src/TableFunctions/TableFunctionHudi.cpp b/src/TableFunctions/TableFunctionHudi.cpp deleted file mode 100644 index 436e708b72d..00000000000 --- a/src/TableFunctions/TableFunctionHudi.cpp +++ /dev/null @@ -1,31 +0,0 @@ -#include "config.h" - -#if USE_AWS_S3 - -#include -#include -#include -#include -#include "registerTableFunctions.h" - -namespace DB -{ - -struct TableFunctionHudiName -{ - static constexpr auto name = "hudi"; -}; -using TableFunctionHudi = ITableFunctionDataLake; - -void registerTableFunctionHudi(TableFunctionFactory & factory) -{ - factory.registerFunction( - {.documentation - = {.description=R"(The table function can be used to read the Hudi table stored on object store.)", - .examples{{"hudi", "SELECT * FROM hudi(url, access_key_id, secret_access_key)", ""}}, - .categories{"DataLake"}}, - .allow_readonly = false}); -} -} - -#endif diff --git a/src/TableFunctions/TableFunctionIceberg.cpp b/src/TableFunctions/TableFunctionIceberg.cpp deleted file mode 100644 index d37aace01c6..00000000000 --- a/src/TableFunctions/TableFunctionIceberg.cpp +++ /dev/null @@ -1,34 +0,0 @@ -#include "config.h" - -#if USE_AWS_S3 && USE_AVRO - -#include -#include -#include -#include -#include "registerTableFunctions.h" - - -namespace DB -{ - -struct TableFunctionIcebergName -{ - static constexpr auto name = "iceberg"; -}; - -using TableFunctionIceberg = ITableFunctionDataLake; - -void registerTableFunctionIceberg(TableFunctionFactory & factory) -{ - factory.registerFunction( - {.documentation - = {.description=R"(The table function can be used to read the Iceberg table stored on object store.)", - .examples{{"iceberg", "SELECT * FROM iceberg(url, access_key_id, secret_access_key)", ""}}, - .categories{"DataLake"}}, - .allow_readonly = false}); -} - -} - -#endif diff --git a/src/TableFunctions/TableFunctionObjectStorage.cpp b/src/TableFunctions/TableFunctionObjectStorage.cpp new file mode 100644 index 00000000000..550d9cc799b --- /dev/null +++ b/src/TableFunctions/TableFunctionObjectStorage.cpp @@ -0,0 +1,226 @@ +#include "config.h" + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + +template +ObjectStoragePtr TableFunctionObjectStorage::getObjectStorage(const ContextPtr & context, bool create_readonly) const +{ + if (!object_storage) + object_storage = configuration->createObjectStorage(context, create_readonly); + return object_storage; +} + +template +StorageObjectStorage::ConfigurationPtr TableFunctionObjectStorage::getConfiguration() const +{ + if (!configuration) + configuration = std::make_shared(); + return configuration; +} + +template +std::vector TableFunctionObjectStorage::skipAnalysisForArguments( + const QueryTreeNodePtr & query_node_table_function, ContextPtr) const +{ + auto & table_function_node = query_node_table_function->as(); + auto & table_function_arguments_nodes = table_function_node.getArguments().getNodes(); + size_t table_function_arguments_size = table_function_arguments_nodes.size(); + + std::vector result; + for (size_t i = 0; i < table_function_arguments_size; ++i) + { + auto * function_node = table_function_arguments_nodes[i]->as(); + if (function_node && function_node->getFunctionName() == "headers") + result.push_back(i); + } + return result; +} + +template +void TableFunctionObjectStorage::parseArguments(const ASTPtr & ast_function, ContextPtr context) +{ + /// Clone ast function, because we can modify its arguments like removing headers. + auto ast_copy = ast_function->clone(); + ASTs & args_func = ast_copy->children; + if (args_func.size() != 1) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Table function '{}' must have arguments.", getName()); + + auto & args = args_func.at(0)->children; + parseArgumentsImpl(args, context); +} + +template +ColumnsDescription TableFunctionObjectStorage< + Definition, Configuration>::getActualTableStructure(ContextPtr context, bool is_insert_query) const +{ + if (configuration->structure == "auto") + { + context->checkAccess(getSourceAccessType()); + ColumnsDescription columns; + auto storage = getObjectStorage(context, !is_insert_query); + resolveSchemaAndFormat(columns, configuration->format, storage, configuration, std::nullopt, context); + return columns; + } + else + return parseColumnsListFromString(configuration->structure, context); +} + +template +StoragePtr TableFunctionObjectStorage::executeImpl( + const ASTPtr & /* ast_function */, + ContextPtr context, + const std::string & table_name, + ColumnsDescription cached_columns, + bool is_insert_query) const +{ + ColumnsDescription columns; + chassert(configuration); + if (configuration->structure != "auto") + columns = parseColumnsListFromString(configuration->structure, context); + else if (!structure_hint.empty()) + columns = structure_hint; + else if (!cached_columns.empty()) + columns = cached_columns; + + StoragePtr storage = std::make_shared( + configuration, + getObjectStorage(context, !is_insert_query), + context, + StorageID(getDatabaseName(), table_name), + columns, + ConstraintsDescription{}, + String{}, + /* format_settings */std::nullopt, + /* distributed_processing */false, + nullptr); + + storage->startup(); + return storage; +} + +void registerTableFunctionObjectStorage(TableFunctionFactory & factory) +{ + UNUSED(factory); +#if USE_AWS_S3 + factory.registerFunction>( + { + .documentation = + { + .description=R"(The table function can be used to read the data stored on AWS S3.)", + .examples{{"s3", "SELECT * FROM s3(url, access_key_id, secret_access_key)", ""} + }, + .categories{"DataLake"}}, + .allow_readonly = false + }); + + factory.registerFunction>( + { + .documentation = + { + .description=R"(The table function can be used to read the data stored on GCS.)", + .examples{{"gcs", "SELECT * FROM gcs(url, access_key_id, secret_access_key)", ""} + }, + .categories{"DataLake"}}, + .allow_readonly = false + }); + + factory.registerFunction>( + { + .documentation = + { + .description=R"(The table function can be used to read the data stored on COSN.)", + .examples{{"cosn", "SELECT * FROM cosn(url, access_key_id, secret_access_key)", ""} + }, + .categories{"DataLake"}}, + .allow_readonly = false + }); + factory.registerFunction>( + { + .documentation = + { + .description=R"(The table function can be used to read the data stored on OSS.)", + .examples{{"oss", "SELECT * FROM oss(url, access_key_id, secret_access_key)", ""} + }, + .categories{"DataLake"}}, + .allow_readonly = false + }); +#endif + +#if USE_AZURE_BLOB_STORAGE + factory.registerFunction>( + { + .documentation = + { + .description=R"(The table function can be used to read the data stored on Azure Blob Storage.)", + .examples{ + { + "azureBlobStorage", + "SELECT * FROM azureBlobStorage(connection_string|storage_account_url, container_name, blobpath, " + "[account_name, account_key, format, compression, structure])", "" + }} + }, + .allow_readonly = false + }); +#endif +#if USE_HDFS + factory.registerFunction>( + { + .documentation = + { + .description=R"(The table function can be used to read the data stored on HDFS virtual filesystem.)", + .examples{ + { + "hdfs", + "SELECT * FROM hdfs(url, format, compression, structure])", "" + }} + }, + .allow_readonly = false + }); +#endif +} + +#if USE_AZURE_BLOB_STORAGE +template class TableFunctionObjectStorage; +template class TableFunctionObjectStorage; +#endif + +#if USE_AWS_S3 +template class TableFunctionObjectStorage; +template class TableFunctionObjectStorage; +template class TableFunctionObjectStorage; +template class TableFunctionObjectStorage; +template class TableFunctionObjectStorage; +#endif + +#if USE_HDFS +template class TableFunctionObjectStorage; +template class TableFunctionObjectStorage; +#endif + +} diff --git a/src/TableFunctions/TableFunctionObjectStorage.h b/src/TableFunctions/TableFunctionObjectStorage.h new file mode 100644 index 00000000000..86b8f0d5e14 --- /dev/null +++ b/src/TableFunctions/TableFunctionObjectStorage.h @@ -0,0 +1,172 @@ +#pragma once + +#include "config.h" +#include +#include +#include +#include +#include + +namespace DB +{ + +class Context; +class StorageS3Configuration; +class StorageAzureConfiguration; +class StorageHDFSConfiguration; +struct S3StorageSettings; +struct AzureStorageSettings; +struct HDFSStorageSettings; + +struct AzureDefinition +{ + static constexpr auto name = "azureBlobStorage"; + static constexpr auto storage_type_name = "Azure"; + static constexpr auto signature = " - connection_string, container_name, blobpath\n" + " - connection_string, container_name, blobpath, structure \n" + " - connection_string, container_name, blobpath, format \n" + " - connection_string, container_name, blobpath, format, compression \n" + " - connection_string, container_name, blobpath, format, compression, structure \n" + " - storage_account_url, container_name, blobpath, account_name, account_key\n" + " - storage_account_url, container_name, blobpath, account_name, account_key, structure\n" + " - storage_account_url, container_name, blobpath, account_name, account_key, format\n" + " - storage_account_url, container_name, blobpath, account_name, account_key, format, compression\n" + " - storage_account_url, container_name, blobpath, account_name, account_key, format, compression, structure\n"; + static constexpr auto max_number_of_arguments = 8; +}; + +struct S3Definition +{ + static constexpr auto name = "s3"; + static constexpr auto storage_type_name = "S3"; + static constexpr auto signature = " - url\n" + " - url, format\n" + " - url, format, structure\n" + " - url, format, structure, compression_method\n" + " - url, access_key_id, secret_access_key\n" + " - url, access_key_id, secret_access_key, session_token\n" + " - url, access_key_id, secret_access_key, format\n" + " - url, access_key_id, secret_access_key, session_token, format\n" + " - url, access_key_id, secret_access_key, format, structure\n" + " - url, access_key_id, secret_access_key, session_token, format, structure\n" + " - url, access_key_id, secret_access_key, format, structure, compression_method\n" + " - url, access_key_id, secret_access_key, session_token, format, structure, compression_method\n" + "All signatures supports optional headers (specified as `headers('name'='value', 'name2'='value2')`)"; + static constexpr auto max_number_of_arguments = 8; +}; + +struct GCSDefinition +{ + static constexpr auto name = "gcs"; + static constexpr auto storage_type_name = "GCS"; + static constexpr auto signature = S3Definition::signature; + static constexpr auto max_number_of_arguments = S3Definition::max_number_of_arguments; +}; + +struct COSNDefinition +{ + static constexpr auto name = "cosn"; + static constexpr auto storage_type_name = "COSN"; + static constexpr auto signature = S3Definition::signature; + static constexpr auto max_number_of_arguments = S3Definition::max_number_of_arguments; +}; + +struct OSSDefinition +{ + static constexpr auto name = "oss"; + static constexpr auto storage_type_name = "OSS"; + static constexpr auto signature = S3Definition::signature; + static constexpr auto max_number_of_arguments = S3Definition::max_number_of_arguments; +}; + +struct HDFSDefinition +{ + static constexpr auto name = "hdfs"; + static constexpr auto storage_type_name = "HDFS"; + static constexpr auto signature = " - uri\n" + " - uri, format\n" + " - uri, format, structure\n" + " - uri, format, structure, compression_method\n"; + static constexpr auto max_number_of_arguments = 4; +}; + +template +class TableFunctionObjectStorage : public ITableFunction +{ +public: + static constexpr auto name = Definition::name; + static constexpr auto signature = Definition::signature; + + static size_t getMaxNumberOfArguments() { return Definition::max_number_of_arguments; } + + String getName() const override { return name; } + + virtual String getSignature() const { return signature; } + + bool hasStaticStructure() const override { return configuration->structure != "auto"; } + + bool needStructureHint() const override { return configuration->structure == "auto"; } + + void setStructureHint(const ColumnsDescription & structure_hint_) override { structure_hint = structure_hint_; } + + bool supportsReadingSubsetOfColumns(const ContextPtr & context) override + { + return configuration->format != "auto" && FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration->format, context); + } + + std::unordered_set getVirtualsToCheckBeforeUsingStructureHint() const override + { + return VirtualColumnUtils::getVirtualNamesForFileLikeStorage(); + } + + virtual void parseArgumentsImpl(ASTs & args, const ContextPtr & context) + { + StorageObjectStorage::Configuration::initialize(*getConfiguration(), args, context, true); + } + + static void updateStructureAndFormatArgumentsIfNeeded( + ASTs & args, + const String & structure, + const String & format, + const ContextPtr & context) + { + Configuration().addStructureAndFormatToArgs(args, structure, format, context); + } + +protected: + using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr; + + StoragePtr executeImpl( + const ASTPtr & ast_function, + ContextPtr context, + const std::string & table_name, + ColumnsDescription cached_columns, + bool is_insert_query) const override; + + const char * getStorageTypeName() const override { return Definition::storage_type_name; } + + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; + void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; + + ObjectStoragePtr getObjectStorage(const ContextPtr & context, bool create_readonly) const; + ConfigurationPtr getConfiguration() const; + + mutable ConfigurationPtr configuration; + mutable ObjectStoragePtr object_storage; + ColumnsDescription structure_hint; + + std::vector skipAnalysisForArguments(const QueryTreeNodePtr & query_node_table_function, ContextPtr context) const override; +}; + +#if USE_AWS_S3 +using TableFunctionS3 = TableFunctionObjectStorage; +#endif + +#if USE_AZURE_BLOB_STORAGE +using TableFunctionAzureBlob = TableFunctionObjectStorage; +#endif + +#if USE_HDFS +using TableFunctionHDFS = TableFunctionObjectStorage; +#endif +} diff --git a/src/TableFunctions/TableFunctionObjectStorageCluster.cpp b/src/TableFunctions/TableFunctionObjectStorageCluster.cpp new file mode 100644 index 00000000000..449bd2c8c49 --- /dev/null +++ b/src/TableFunctions/TableFunctionObjectStorageCluster.cpp @@ -0,0 +1,118 @@ +#include "config.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +template +StoragePtr TableFunctionObjectStorageCluster::executeImpl( + const ASTPtr & /*function*/, ContextPtr context, + const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const +{ + auto configuration = Base::getConfiguration(); + + ColumnsDescription columns; + if (configuration->structure != "auto") + columns = parseColumnsListFromString(configuration->structure, context); + else if (!Base::structure_hint.empty()) + columns = Base::structure_hint; + else if (!cached_columns.empty()) + columns = cached_columns; + + auto object_storage = Base::getObjectStorage(context, !is_insert_query); + StoragePtr storage; + if (context->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY) + { + /// On worker node this filename won't contains globs + storage = std::make_shared( + configuration, + object_storage, + context, + StorageID(Base::getDatabaseName(), table_name), + columns, + ConstraintsDescription{}, + /* comment */String{}, + /* format_settings */std::nullopt, /// No format_settings + /* distributed_processing */true, + /*partition_by_=*/nullptr); + } + else + { + storage = std::make_shared( + ITableFunctionCluster::cluster_name, + configuration, + object_storage, + StorageID(Base::getDatabaseName(), table_name), + columns, + ConstraintsDescription{}, + context); + } + + storage->startup(); + return storage; +} + + +void registerTableFunctionObjectStorageCluster(TableFunctionFactory & factory) +{ +#if USE_AWS_S3 + factory.registerFunction( + { + .documentation = { + .description=R"(The table function can be used to read the data stored on S3 in parallel for many nodes in a specified cluster.)", + .examples{{"s3Cluster", "SELECT * FROM s3Cluster(cluster, url, format, structure)", ""}}}, + .allow_readonly = false + } + ); +#endif + +#if USE_AZURE_BLOB_STORAGE + factory.registerFunction( + { + .documentation = { + .description=R"(The table function can be used to read the data stored on Azure Blob Storage in parallel for many nodes in a specified cluster.)", + .examples{{ + "azureBlobStorageCluster", + "SELECT * FROM azureBlobStorageCluster(cluster, connection_string|storage_account_url, container_name, blobpath, " + "[account_name, account_key, format, compression, structure])", ""}}}, + .allow_readonly = false + } + ); +#endif + +#if USE_HDFS + factory.registerFunction( + { + .documentation = { + .description=R"(The table function can be used to read the data stored on HDFS in parallel for many nodes in a specified cluster.)", + .examples{{"HDFSCluster", "SELECT * FROM HDFSCluster(cluster_name, uri, format)", ""}}}, + .allow_readonly = false + } + ); +#endif + + UNUSED(factory); +} + +#if USE_AWS_S3 +template class TableFunctionObjectStorageCluster; +#endif + +#if USE_AZURE_BLOB_STORAGE +template class TableFunctionObjectStorageCluster; +#endif + +#if USE_HDFS +template class TableFunctionObjectStorageCluster; +#endif +} diff --git a/src/TableFunctions/TableFunctionObjectStorageCluster.h b/src/TableFunctions/TableFunctionObjectStorageCluster.h new file mode 100644 index 00000000000..296791b8bda --- /dev/null +++ b/src/TableFunctions/TableFunctionObjectStorageCluster.h @@ -0,0 +1,102 @@ +#pragma once +#include "config.h" +#include +#include +#include + + +namespace DB +{ + +class Context; + +class StorageS3Settings; +class StorageAzureBlobSettings; +class StorageS3Configuration; +class StorageAzureConfiguration; + +struct AzureClusterDefinition +{ + static constexpr auto name = "azureBlobStorageCluster"; + static constexpr auto storage_type_name = "AzureBlobStorageCluster"; + static constexpr auto signature = " - cluster, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure]"; + static constexpr auto max_number_of_arguments = AzureDefinition::max_number_of_arguments + 1; +}; + +struct S3ClusterDefinition +{ + static constexpr auto name = "s3Cluster"; + static constexpr auto storage_type_name = "S3Cluster"; + static constexpr auto signature = " - cluster, url\n" + " - cluster, url, format\n" + " - cluster, url, format, structure\n" + " - cluster, url, access_key_id, secret_access_key\n" + " - cluster, url, format, structure, compression_method\n" + " - cluster, url, access_key_id, secret_access_key, format\n" + " - cluster, url, access_key_id, secret_access_key, format, structure\n" + " - cluster, url, access_key_id, secret_access_key, format, structure, compression_method\n" + " - cluster, url, access_key_id, secret_access_key, session_token, format, structure, compression_method\n" + "All signatures supports optional headers (specified as `headers('name'='value', 'name2'='value2')`)"; + static constexpr auto max_number_of_arguments = S3Definition::max_number_of_arguments + 1; +}; + +struct HDFSClusterDefinition +{ + static constexpr auto name = "hdfsCluster"; + static constexpr auto storage_type_name = "HDFSCluster"; + static constexpr auto signature = " - cluster_name, uri\n" + " - cluster_name, uri, format\n" + " - cluster_name, uri, format, structure\n" + " - cluster_name, uri, format, structure, compression_method\n"; + static constexpr auto max_number_of_arguments = HDFSDefinition::max_number_of_arguments + 1; +}; + +/** +* Class implementing s3/hdfs/azureBlobStorage)Cluster(...) table functions, +* which allow to process many files from S3/HDFS/Azure blob storage on a specific cluster. +* On initiator it creates a connection to _all_ nodes in cluster, discloses asterisks +* in file path and dispatch each file dynamically. +* On worker node it asks initiator about next task to process, processes it. +* This is repeated until the tasks are finished. +*/ +template +class TableFunctionObjectStorageCluster : public ITableFunctionCluster> +{ +public: + static constexpr auto name = Definition::name; + static constexpr auto signature = Definition::signature; + + String getName() const override { return name; } + String getSignature() const override { return signature; } + +protected: + using Base = TableFunctionObjectStorage; + + StoragePtr executeImpl( + const ASTPtr & ast_function, + ContextPtr context, + const std::string & table_name, + ColumnsDescription cached_columns, + bool is_insert_query) const override; + + const char * getStorageTypeName() const override { return Definition::storage_type_name; } + + bool hasStaticStructure() const override { return Base::getConfiguration()->structure != "auto"; } + + bool needStructureHint() const override { return Base::getConfiguration()->structure == "auto"; } + + void setStructureHint(const ColumnsDescription & structure_hint_) override { Base::structure_hint = structure_hint_; } +}; + +#if USE_AWS_S3 +using TableFunctionS3Cluster = TableFunctionObjectStorageCluster; +#endif + +#if USE_AZURE_BLOB_STORAGE +using TableFunctionAzureBlobCluster = TableFunctionObjectStorageCluster; +#endif + +#if USE_HDFS +using TableFunctionHDFSCluster = TableFunctionObjectStorageCluster; +#endif +} diff --git a/src/TableFunctions/TableFunctionS3.cpp b/src/TableFunctions/TableFunctionS3.cpp deleted file mode 100644 index dfb427a3bba..00000000000 --- a/src/TableFunctions/TableFunctionS3.cpp +++ /dev/null @@ -1,518 +0,0 @@ -#include "config.h" - -#if USE_AWS_S3 - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "registerTableFunctions.h" -#include -#include - -#include - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int LOGICAL_ERROR; -} - - -std::vector TableFunctionS3::skipAnalysisForArguments(const QueryTreeNodePtr & query_node_table_function, ContextPtr) const -{ - auto & table_function_node = query_node_table_function->as(); - auto & table_function_arguments_nodes = table_function_node.getArguments().getNodes(); - size_t table_function_arguments_size = table_function_arguments_nodes.size(); - - std::vector result; - - for (size_t i = 0; i < table_function_arguments_size; ++i) - { - auto * function_node = table_function_arguments_nodes[i]->as(); - if (function_node && function_node->getFunctionName() == "headers") - result.push_back(i); - } - - return result; -} - -/// This is needed to avoid copy-paste. Because s3Cluster arguments only differ in additional argument (first) - cluster name -void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context) -{ - if (auto named_collection = tryGetNamedCollectionWithOverrides(args, context)) - { - StorageS3::processNamedCollectionResult(configuration, *named_collection); - if (configuration.format == "auto") - { - String file_path = named_collection->getOrDefault("filename", Poco::URI(named_collection->get("url")).getPath()); - configuration.format = FormatFactory::instance().tryGetFormatFromFileName(file_path).value_or("auto"); - } - } - else - { - size_t count = StorageURL::evalArgsAndCollectHeaders(args, configuration.headers_from_ast, context); - - if (count == 0 || count > 7) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "The signature of table function {} shall be the following:\n{}", getName(), getSignature()); - - std::unordered_map args_to_idx; - - bool no_sign_request = false; - - /// For 2 arguments we support 2 possible variants: - /// - s3(source, format) - /// - s3(source, NOSIGN) - /// We can distinguish them by looking at the 2-nd argument: check if it's NOSIGN or not. - if (count == 2) - { - auto second_arg = checkAndGetLiteralArgument(args[1], "format/NOSIGN"); - if (boost::iequals(second_arg, "NOSIGN")) - no_sign_request = true; - else - args_to_idx = {{"format", 1}}; - } - /// For 3 arguments we support 3 possible variants: - /// - s3(source, format, structure) - /// - s3(source, access_key_id, secret_access_key) - /// - s3(source, NOSIGN, format) - /// We can distinguish them by looking at the 2-nd argument: check if it's a format name or not. - else if (count == 3) - { - auto second_arg = checkAndGetLiteralArgument(args[1], "format/access_key_id/NOSIGN"); - if (boost::iequals(second_arg, "NOSIGN")) - { - no_sign_request = true; - args_to_idx = {{"format", 2}}; - } - else if (second_arg == "auto" || FormatFactory::instance().exists(second_arg)) - args_to_idx = {{"format", 1}, {"structure", 2}}; - else - args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}}; - } - /// For 4 arguments we support 4 possible variants: - /// - s3(source, format, structure, compression_method), - /// - s3(source, access_key_id, secret_access_key, format), - /// - s3(source, access_key_id, secret_access_key, session_token) - /// - s3(source, NOSIGN, format, structure) - /// We can distinguish them by looking at the 2-nd and 4-th argument: check if it's a format name or not. - else if (count == 4) - { - auto second_arg = checkAndGetLiteralArgument(args[1], "format/access_key_id/NOSIGN"); - if (boost::iequals(second_arg, "NOSIGN")) - { - no_sign_request = true; - args_to_idx = {{"format", 2}, {"structure", 3}}; - } - else if (second_arg == "auto" || FormatFactory::instance().exists(second_arg)) - { - args_to_idx = {{"format", 1}, {"structure", 2}, {"compression_method", 3}}; - } - else - { - auto fourth_arg = checkAndGetLiteralArgument(args[3], "format/session_token"); - if (fourth_arg == "auto" || FormatFactory::instance().exists(fourth_arg)) - { - args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}}; - } - else - { - args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}}; - } - } - } - /// For 5 arguments we support 3 possible variants: - /// - s3(source, access_key_id, secret_access_key, format, structure) - /// - s3(source, access_key_id, secret_access_key, session_token, format) - /// - s3(source, NOSIGN, format, structure, compression_method) - /// We can distinguish them by looking at the 2-nd argument: check if it's a NOSIGN keyword name or no, - /// and by the 4-th argument, check if it's a format name or not - else if (count == 5) - { - auto second_arg = checkAndGetLiteralArgument(args[1], "NOSIGN/access_key_id"); - if (boost::iequals(second_arg, "NOSIGN")) - { - no_sign_request = true; - args_to_idx = {{"format", 2}, {"structure", 3}, {"compression_method", 4}}; - } - else - { - auto fourth_arg = checkAndGetLiteralArgument(args[3], "format/session_token"); - if (fourth_arg == "auto" || FormatFactory::instance().exists(fourth_arg)) - { - args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}, {"structure", 4}}; - } - else - { - args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}, {"format", 4}}; - } - } - } - // For 6 arguments we support 2 possible variants: - /// - s3(source, access_key_id, secret_access_key, format, structure, compression_method) - /// - s3(source, access_key_id, secret_access_key, session_token, format, structure) - /// We can distinguish them by looking at the 4-th argument: check if it's a format name or not - else if (count == 6) - { - auto fourth_arg = checkAndGetLiteralArgument(args[3], "format/session_token"); - if (fourth_arg == "auto" || FormatFactory::instance().exists(fourth_arg)) - { - args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}, {"structure", 4}, {"compression_method", 5}}; - } - else - { - args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}, {"format", 4}, {"structure", 5}}; - } - } - else if (count == 7) - { - args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}, {"format", 4}, {"structure", 5}, {"compression_method", 6}}; - } - - /// This argument is always the first - String url = checkAndGetLiteralArgument(args[0], "url"); - configuration.url = S3::URI(url); - - if (args_to_idx.contains("format")) - { - auto format = checkAndGetLiteralArgument(args[args_to_idx["format"]], "format"); - /// Set format to configuration only of it's not 'auto', - /// because we can have default format set in configuration. - if (format != "auto") - configuration.format = format; - } - - if (args_to_idx.contains("structure")) - configuration.structure = checkAndGetLiteralArgument(args[args_to_idx["structure"]], "structure"); - - if (args_to_idx.contains("compression_method")) - configuration.compression_method = checkAndGetLiteralArgument(args[args_to_idx["compression_method"]], "compression_method"); - - if (args_to_idx.contains("access_key_id")) - configuration.auth_settings.access_key_id = checkAndGetLiteralArgument(args[args_to_idx["access_key_id"]], "access_key_id"); - - if (args_to_idx.contains("secret_access_key")) - configuration.auth_settings.secret_access_key = checkAndGetLiteralArgument(args[args_to_idx["secret_access_key"]], "secret_access_key"); - - if (args_to_idx.contains("session_token")) - configuration.auth_settings.session_token = checkAndGetLiteralArgument(args[args_to_idx["session_token"]], "session_token"); - - configuration.auth_settings.no_sign_request = no_sign_request; - - if (configuration.format == "auto") - { - if (configuration.url.archive_pattern.has_value()) - { - configuration.format = FormatFactory::instance() - .tryGetFormatFromFileName(Poco::URI(configuration.url.archive_pattern.value()).getPath()) - .value_or("auto"); - } - else - { - configuration.format - = FormatFactory::instance().tryGetFormatFromFileName(Poco::URI(configuration.url.uri_str).getPath()).value_or("auto"); - } - } - } - - configuration.keys = {configuration.url.key}; -} - -void TableFunctionS3::parseArguments(const ASTPtr & ast_function, ContextPtr context) -{ - /// Clone ast function, because we can modify its arguments like removing headers. - auto ast_copy = ast_function->clone(); - - /// Parse args - ASTs & args_func = ast_function->children; - - if (args_func.size() != 1) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Table function '{}' must have arguments.", getName()); - - auto & args = args_func.at(0)->children; - - parseArgumentsImpl(args, context); -} - -void TableFunctionS3::updateStructureAndFormatArgumentsIfNeeded(ASTs & args, const String & structure, const String & format, const ContextPtr & context) -{ - if (auto collection = tryGetNamedCollectionWithOverrides(args, context)) - { - /// In case of named collection, just add key-value pairs "format='...', structure='...'" - /// at the end of arguments to override existed format and structure with "auto" values. - if (collection->getOrDefault("format", "auto") == "auto") - { - ASTs format_equal_func_args = {std::make_shared("format"), std::make_shared(format)}; - auto format_equal_func = makeASTFunction("equals", std::move(format_equal_func_args)); - args.push_back(format_equal_func); - } - if (collection->getOrDefault("structure", "auto") == "auto") - { - ASTs structure_equal_func_args = {std::make_shared("structure"), std::make_shared(structure)}; - auto structure_equal_func = makeASTFunction("equals", std::move(structure_equal_func_args)); - args.push_back(structure_equal_func); - } - } - else - { - HTTPHeaderEntries tmp_headers; - size_t count = StorageURL::evalArgsAndCollectHeaders(args, tmp_headers, context); - - if (count == 0 || count > getMaxNumberOfArguments()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected 1 to {} arguments in table function, got {}", getMaxNumberOfArguments(), count); - - auto format_literal = std::make_shared(format); - auto structure_literal = std::make_shared(structure); - - /// s3(s3_url) -> s3(s3_url, format, structure) - if (count == 1) - { - args.push_back(format_literal); - args.push_back(structure_literal); - } - /// s3(s3_url, format) -> s3(s3_url, format, structure) or - /// s3(s3_url, NOSIGN) -> s3(s3_url, NOSIGN, format, structure) - /// We can distinguish them by looking at the 2-nd argument: check if it's NOSIGN or not. - else if (count == 2) - { - auto second_arg = checkAndGetLiteralArgument(args[1], "format/NOSIGN"); - if (boost::iequals(second_arg, "NOSIGN")) - args.push_back(format_literal); - else if (second_arg == "auto") - args.back() = format_literal; - args.push_back(structure_literal); - } - /// s3(source, format, structure) or - /// s3(source, access_key_id, secret_access_key) or - /// s3(source, NOSIGN, format) - /// We can distinguish them by looking at the 2-nd argument: check if it's NOSIGN, format name or neither. - else if (count == 3) - { - auto second_arg = checkAndGetLiteralArgument(args[1], "format/NOSIGN"); - /// s3(source, NOSIGN, format) -> s3(source, NOSIGN, format, structure) - if (boost::iequals(second_arg, "NOSIGN")) - { - if (checkAndGetLiteralArgument(args[2], "format") == "auto") - args.back() = format_literal; - args.push_back(structure_literal); - } - /// s3(source, format, structure) - else if (second_arg == "auto" || FormatFactory::instance().exists(second_arg)) - { - if (second_arg == "auto") - args[1] = format_literal; - if (checkAndGetLiteralArgument(args[2], "structure") == "auto") - args[2] = structure_literal; - } - /// s3(source, access_key_id, access_key_id) -> s3(source, access_key_id, access_key_id, format, structure) - else - { - args.push_back(format_literal); - args.push_back(structure_literal); - } - } - /// s3(source, format, structure, compression_method) or - /// s3(source, access_key_id, secret_access_key, format) or - /// s3(source, NOSIGN, format, structure) - /// We can distinguish them by looking at the 2-nd argument: check if it's NOSIGN, format name or neither. - else if (count == 4) - { - auto second_arg = checkAndGetLiteralArgument(args[1], "format/NOSIGN"); - /// s3(source, NOSIGN, format, structure) - if (boost::iequals(second_arg, "NOSIGN")) - { - if (checkAndGetLiteralArgument(args[2], "format") == "auto") - args[2] = format_literal; - if (checkAndGetLiteralArgument(args[3], "structure") == "auto") - args[3] = structure_literal; - } - /// s3(source, format, structure, compression_method) - else if (second_arg == "auto" || FormatFactory::instance().exists(second_arg)) - { - if (second_arg == "auto") - args[1] = format_literal; - if (checkAndGetLiteralArgument(args[2], "structure") == "auto") - args[2] = structure_literal; - } - /// s3(source, access_key_id, access_key_id, format) -> s3(source, access_key_id, access_key_id, format, structure) - else - { - if (checkAndGetLiteralArgument(args[3], "format") == "auto") - args[3] = format_literal; - args.push_back(structure_literal); - } - } - /// s3(source, access_key_id, secret_access_key, format, structure) or - /// s3(source, NOSIGN, format, structure, compression_method) - /// We can distinguish them by looking at the 2-nd argument: check if it's a NOSIGN keyword name or not. - else if (count == 5) - { - auto sedond_arg = checkAndGetLiteralArgument(args[1], "format/NOSIGN"); - /// s3(source, NOSIGN, format, structure, compression_method) - if (boost::iequals(sedond_arg, "NOSIGN")) - { - if (checkAndGetLiteralArgument(args[2], "format") == "auto") - args[2] = format_literal; - if (checkAndGetLiteralArgument(args[3], "structure") == "auto") - args[3] = structure_literal; - } - /// s3(source, access_key_id, access_key_id, format, structure) - else - { - if (checkAndGetLiteralArgument(args[3], "format") == "auto") - args[3] = format_literal; - if (checkAndGetLiteralArgument(args[4], "structure") == "auto") - args[4] = structure_literal; - } - } - /// s3(source, access_key_id, secret_access_key, format, structure, compression) - else if (count == 6) - { - if (checkAndGetLiteralArgument(args[3], "format") == "auto") - args[3] = format_literal; - if (checkAndGetLiteralArgument(args[4], "structure") == "auto") - args[4] = structure_literal; - } - } -} - -ColumnsDescription TableFunctionS3::getActualTableStructure(ContextPtr context, bool /*is_insert_query*/) const -{ - if (configuration.structure == "auto") - { - context->checkAccess(getSourceAccessType()); - configuration.update(context); - if (configuration.format == "auto") - return StorageS3::getTableStructureAndFormatFromData(configuration, std::nullopt, context).first; - - return StorageS3::getTableStructureFromData(configuration, std::nullopt, context); - } - - return parseColumnsListFromString(configuration.structure, context); -} - -bool TableFunctionS3::supportsReadingSubsetOfColumns(const ContextPtr & context) -{ - return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration.format, context); -} - -std::unordered_set TableFunctionS3::getVirtualsToCheckBeforeUsingStructureHint() const -{ - return VirtualColumnUtils::getVirtualNamesForFileLikeStorage(); -} - -StoragePtr TableFunctionS3::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns, bool /*is_insert_query*/) const -{ - S3::URI s3_uri (configuration.url); - - ColumnsDescription columns; - if (configuration.structure != "auto") - columns = parseColumnsListFromString(configuration.structure, context); - else if (!structure_hint.empty()) - columns = structure_hint; - else if (!cached_columns.empty()) - columns = cached_columns; - - StoragePtr storage = std::make_shared( - configuration, - context, - StorageID(getDatabaseName(), table_name), - columns, - ConstraintsDescription{}, - String{}, - /// No format_settings for table function S3 - std::nullopt); - - storage->startup(); - - return storage; -} - - -class TableFunctionGCS : public TableFunctionS3 -{ -public: - static constexpr auto name = "gcs"; - std::string getName() const override - { - return name; - } -private: - const char * getStorageTypeName() const override { return "GCS"; } -}; - -class TableFunctionCOS : public TableFunctionS3 -{ -public: - static constexpr auto name = "cosn"; - std::string getName() const override - { - return name; - } -private: - const char * getStorageTypeName() const override { return "COSN"; } -}; - -class TableFunctionOSS : public TableFunctionS3 -{ -public: - static constexpr auto name = "oss"; - std::string getName() const override - { - return name; - } -private: - const char * getStorageTypeName() const override { return "OSS"; } -}; - - -void registerTableFunctionGCS(TableFunctionFactory & factory) -{ - factory.registerFunction( - {.documentation - = {.description=R"(The table function can be used to read the data stored on Google Cloud Storage.)", - .examples{{"gcs", "SELECT * FROM gcs(url, hmac_key, hmac_secret)", ""}}, - .categories{"DataLake"}}, - .allow_readonly = false}); -} - -void registerTableFunctionS3(TableFunctionFactory & factory) -{ - factory.registerFunction( - {.documentation - = {.description=R"(The table function can be used to read the data stored on AWS S3.)", - .examples{{"s3", "SELECT * FROM s3(url, access_key_id, secret_access_key)", ""}}, - .categories{"DataLake"}}, - .allow_readonly = false}); -} - - -void registerTableFunctionCOS(TableFunctionFactory & factory) -{ - factory.registerFunction(); -} - -void registerTableFunctionOSS(TableFunctionFactory & factory) -{ - factory.registerFunction(); -} - -} - -#endif diff --git a/src/TableFunctions/TableFunctionS3.h b/src/TableFunctions/TableFunctionS3.h deleted file mode 100644 index 00ca36c6653..00000000000 --- a/src/TableFunctions/TableFunctionS3.h +++ /dev/null @@ -1,86 +0,0 @@ -#pragma once - -#include "config.h" - -#if USE_AWS_S3 - -#include -#include - - -namespace DB -{ - -class Context; - -/* s3(source, [access_key_id, secret_access_key,] [format, structure, compression]) - creates a temporary storage for a file in S3. - */ -class TableFunctionS3 : public ITableFunction -{ -public: - static constexpr auto name = "s3"; - static constexpr auto signature = " - url\n" - " - url, format\n" - " - url, format, structure\n" - " - url, format, structure, compression_method\n" - " - url, access_key_id, secret_access_key\n" - " - url, access_key_id, secret_access_key, session_token\n" - " - url, access_key_id, secret_access_key, format\n" - " - url, access_key_id, secret_access_key, session_token, format\n" - " - url, access_key_id, secret_access_key, format, structure\n" - " - url, access_key_id, secret_access_key, session_token, format, structure\n" - " - url, access_key_id, secret_access_key, format, structure, compression_method\n" - " - url, access_key_id, secret_access_key, session_token, format, structure, compression_method\n" - "All signatures supports optional headers (specified as `headers('name'='value', 'name2'='value2')`)"; - - static size_t getMaxNumberOfArguments() { return 6; } - - String getName() const override - { - return name; - } - - virtual String getSignature() const - { - return signature; - } - - bool hasStaticStructure() const override { return configuration.structure != "auto"; } - - bool needStructureHint() const override { return configuration.structure == "auto"; } - - void setStructureHint(const ColumnsDescription & structure_hint_) override { structure_hint = structure_hint_; } - - bool supportsReadingSubsetOfColumns(const ContextPtr & context) override; - - std::unordered_set getVirtualsToCheckBeforeUsingStructureHint() const override; - - virtual void parseArgumentsImpl(ASTs & args, const ContextPtr & context); - - static void updateStructureAndFormatArgumentsIfNeeded(ASTs & args, const String & structure, const String & format, const ContextPtr & context); - -protected: - - StoragePtr executeImpl( - const ASTPtr & ast_function, - ContextPtr context, - const std::string & table_name, - ColumnsDescription cached_columns, - bool is_insert_query) const override; - - const char * getStorageTypeName() const override { return "S3"; } - - ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; - void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; - - mutable StorageS3::Configuration configuration; - ColumnsDescription structure_hint; - -private: - - std::vector skipAnalysisForArguments(const QueryTreeNodePtr & query_node_table_function, ContextPtr context) const override; -}; - -} - -#endif diff --git a/src/TableFunctions/TableFunctionS3Cluster.cpp b/src/TableFunctions/TableFunctionS3Cluster.cpp deleted file mode 100644 index e727c4e4c89..00000000000 --- a/src/TableFunctions/TableFunctionS3Cluster.cpp +++ /dev/null @@ -1,72 +0,0 @@ -#include "config.h" - -#if USE_AWS_S3 - -#include -#include -#include -#include - -#include "registerTableFunctions.h" - -#include - - -namespace DB -{ - -StoragePtr TableFunctionS3Cluster::executeImpl( - const ASTPtr & /*function*/, ContextPtr context, - const std::string & table_name, ColumnsDescription /*cached_columns*/, bool /*is_insert_query*/) const -{ - StoragePtr storage; - ColumnsDescription columns; - - if (configuration.structure != "auto") - { - columns = parseColumnsListFromString(configuration.structure, context); - } - else if (!structure_hint.empty()) - { - columns = structure_hint; - } - - if (context->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY) - { - /// On worker node this filename won't contains globs - storage = std::make_shared( - configuration, - context, - StorageID(getDatabaseName(), table_name), - columns, - ConstraintsDescription{}, - /* comment */String{}, - /* format_settings */std::nullopt, /// No format_settings for S3Cluster - /*distributed_processing=*/true); - } - else - { - storage = std::make_shared( - cluster_name, - configuration, - StorageID(getDatabaseName(), table_name), - columns, - ConstraintsDescription{}, - context); - } - - storage->startup(); - - return storage; -} - - -void registerTableFunctionS3Cluster(TableFunctionFactory & factory) -{ - factory.registerFunction(); -} - - -} - -#endif diff --git a/src/TableFunctions/TableFunctionS3Cluster.h b/src/TableFunctions/TableFunctionS3Cluster.h deleted file mode 100644 index 718b0d90de8..00000000000 --- a/src/TableFunctions/TableFunctionS3Cluster.h +++ /dev/null @@ -1,64 +0,0 @@ -#pragma once - -#include "config.h" - -#if USE_AWS_S3 - -#include -#include -#include -#include - - -namespace DB -{ - -class Context; - -/** - * s3cluster(cluster_name, source, [access_key_id, secret_access_key,] format, structure, compression_method) - * A table function, which allows to process many files from S3 on a specific cluster - * On initiator it creates a connection to _all_ nodes in cluster, discloses asterisks - * in S3 file path and dispatch each file dynamically. - * On worker node it asks initiator about next task to process, processes it. - * This is repeated until the tasks are finished. - */ -class TableFunctionS3Cluster : public ITableFunctionCluster -{ -public: - static constexpr auto name = "s3Cluster"; - static constexpr auto signature = " - cluster, url\n" - " - cluster, url, format\n" - " - cluster, url, format, structure\n" - " - cluster, url, access_key_id, secret_access_key\n" - " - cluster, url, format, structure, compression_method\n" - " - cluster, url, access_key_id, secret_access_key, format\n" - " - cluster, url, access_key_id, secret_access_key, format, structure\n" - " - cluster, url, access_key_id, secret_access_key, format, structure, compression_method\n" - " - cluster, url, access_key_id, secret_access_key, session_token, format, structure, compression_method\n" - "All signatures supports optional headers (specified as `headers('name'='value', 'name2'='value2')`)"; - - String getName() const override - { - return name; - } - - String getSignature() const override - { - return signature; - } - -protected: - StoragePtr executeImpl( - const ASTPtr & ast_function, - ContextPtr context, - const std::string & table_name, - ColumnsDescription cached_columns, - bool is_insert_query) const override; - - const char * getStorageTypeName() const override { return "S3Cluster"; } -}; - -} - -#endif diff --git a/src/TableFunctions/registerDataLakeTableFunctions.cpp b/src/TableFunctions/registerDataLakeTableFunctions.cpp new file mode 100644 index 00000000000..15a6668f434 --- /dev/null +++ b/src/TableFunctions/registerDataLakeTableFunctions.cpp @@ -0,0 +1,69 @@ +#include +#include + +namespace DB +{ + +#if USE_AWS_S3 +#if USE_AVRO +void registerTableFunctionIceberg(TableFunctionFactory & factory) +{ + factory.registerFunction( + { + .documentation = + { + .description=R"(The table function can be used to read the Iceberg table stored on object store.)", + .examples{{"iceberg", "SELECT * FROM iceberg(url, access_key_id, secret_access_key)", ""}}, + .categories{"DataLake"} + }, + .allow_readonly = false + }); +} +#endif + +#if USE_PARQUET +void registerTableFunctionDeltaLake(TableFunctionFactory & factory) +{ + factory.registerFunction( + { + .documentation = + { + .description=R"(The table function can be used to read the DeltaLake table stored on object store.)", + .examples{{"deltaLake", "SELECT * FROM deltaLake(url, access_key_id, secret_access_key)", ""}}, + .categories{"DataLake"} + }, + .allow_readonly = false + }); +} +#endif + +void registerTableFunctionHudi(TableFunctionFactory & factory) +{ + factory.registerFunction( + { + .documentation = + { + .description=R"(The table function can be used to read the Hudi table stored on object store.)", + .examples{{"hudi", "SELECT * FROM hudi(url, access_key_id, secret_access_key)", ""}}, + .categories{"DataLake"} + }, + .allow_readonly = false + }); +} +#endif + +void registerDataLakeTableFunctions(TableFunctionFactory & factory) +{ + UNUSED(factory); +#if USE_AWS_S3 +#if USE_AVRO + registerTableFunctionIceberg(factory); +#endif +#if USE_PARQUET + registerTableFunctionDeltaLake(factory); +#endif + registerTableFunctionHudi(factory); +#endif +} + +} diff --git a/src/TableFunctions/registerTableFunctions.cpp b/src/TableFunctions/registerTableFunctions.cpp index 927457ff9f6..26b9a771416 100644 --- a/src/TableFunctions/registerTableFunctions.cpp +++ b/src/TableFunctions/registerTableFunctions.cpp @@ -29,27 +29,6 @@ void registerTableFunctions() registerTableFunctionFuzzJSON(factory); #endif -#if USE_AWS_S3 - registerTableFunctionS3(factory); - registerTableFunctionS3Cluster(factory); - registerTableFunctionCOS(factory); - registerTableFunctionOSS(factory); - registerTableFunctionGCS(factory); - registerTableFunctionHudi(factory); -#if USE_PARQUET - registerTableFunctionDeltaLake(factory); -#endif -#if USE_AVRO - registerTableFunctionIceberg(factory); -#endif - -#endif - -#if USE_HDFS - registerTableFunctionHDFS(factory); - registerTableFunctionHDFSCluster(factory); -#endif - #if USE_HIVE registerTableFunctionHive(factory); #endif @@ -77,12 +56,9 @@ void registerTableFunctions() registerTableFunctionFormat(factory); registerTableFunctionExplain(factory); -#if USE_AZURE_BLOB_STORAGE - registerTableFunctionAzureBlobStorage(factory); - registerTableFunctionAzureBlobStorageCluster(factory); -#endif - - + registerTableFunctionObjectStorage(factory); + registerTableFunctionObjectStorageCluster(factory); + registerDataLakeTableFunctions(factory); } } diff --git a/src/TableFunctions/registerTableFunctions.h b/src/TableFunctions/registerTableFunctions.h index 296af146faf..4a89b3afbb3 100644 --- a/src/TableFunctions/registerTableFunctions.h +++ b/src/TableFunctions/registerTableFunctions.h @@ -32,18 +32,6 @@ void registerTableFunctionS3Cluster(TableFunctionFactory & factory); void registerTableFunctionCOS(TableFunctionFactory & factory); void registerTableFunctionOSS(TableFunctionFactory & factory); void registerTableFunctionGCS(TableFunctionFactory & factory); -void registerTableFunctionHudi(TableFunctionFactory & factory); -#if USE_PARQUET -void registerTableFunctionDeltaLake(TableFunctionFactory & factory); -#endif -#if USE_AVRO -void registerTableFunctionIceberg(TableFunctionFactory & factory); -#endif -#endif - -#if USE_HDFS -void registerTableFunctionHDFS(TableFunctionFactory & factory); -void registerTableFunctionHDFSCluster(TableFunctionFactory & factory); #endif #if USE_HIVE @@ -74,10 +62,9 @@ void registerTableFunctionFormat(TableFunctionFactory & factory); void registerTableFunctionExplain(TableFunctionFactory & factory); -#if USE_AZURE_BLOB_STORAGE -void registerTableFunctionAzureBlobStorage(TableFunctionFactory & factory); -void registerTableFunctionAzureBlobStorageCluster(TableFunctionFactory & factory); -#endif +void registerTableFunctionObjectStorage(TableFunctionFactory & factory); +void registerTableFunctionObjectStorageCluster(TableFunctionFactory & factory); +void registerDataLakeTableFunctions(TableFunctionFactory & factory); void registerTableFunctions(); diff --git a/tests/ci/ci.py b/tests/ci/ci.py index 3a616c8aad6..c4e06ccd79a 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -17,7 +17,7 @@ from typing import Any, Dict, List, Optional, Sequence, Set, Tuple, Union import docker_images_helper import upload_result_helper from build_check import get_release_or_pr -from ci_config import CI_CONFIG, Build, CILabels, CIStages, JobNames +from ci_config import CI_CONFIG, Build, CILabels, CIStages, JobNames, StatusNames from ci_utils import GHActions, is_hex, normalize_string from clickhouse_helper import ( CiLogsCredentials, @@ -44,14 +44,17 @@ from env_helper import ( REPORT_PATH, S3_BUILDS_BUCKET, TEMP_PATH, + GITHUB_RUN_ID, + GITHUB_REPOSITORY, ) from get_robot_token import get_best_robot_token from git_helper import GIT_PREFIX, Git from git_helper import Runner as GitRunner from github_helper import GitHub from pr_info import PRInfo -from report import ERROR, SUCCESS, BuildResult, JobReport +from report import ERROR, SUCCESS, BuildResult, JobReport, PENDING from s3_helper import S3Helper +from ci_metadata import CiMetadata from version_helper import get_version_from_repo # pylint: disable=too-many-lines @@ -66,12 +69,12 @@ class PendingState: class CiCache: """ CI cache is a bunch of records. Record is a file stored under special location on s3. - The file name has following format + The file name has a format: _[]--___.ci RECORD_TYPE: - SUCCESSFUL - for successfuly finished jobs + SUCCESSFUL - for successful jobs PENDING - for pending jobs ATTRIBUTES: @@ -503,7 +506,7 @@ class CiCache: self, job: str, batch: int, num_batches: int, release_branch: bool ) -> bool: """ - checks if a given job have already been done successfuly + checks if a given job have already been done successfully """ return self.exist( self.RecordType.SUCCESSFUL, job, batch, num_batches, release_branch @@ -744,7 +747,7 @@ class CiOptions: # list of specified jobs to run ci_jobs: Optional[List[str]] = None - # btaches to run for all multi-batch jobs + # batches to run for all multi-batch jobs job_batches: Optional[List[int]] = None do_not_test: bool = False @@ -948,7 +951,7 @@ class CiOptions: jobs_params[job] = { "batches": list(range(num_batches)), "num_batches": num_batches, - "run_if_ci_option_include_set": job_config.run_by_ci_option + "run_by_ci_option": job_config.run_by_ci_option and pr_info.is_pr, } @@ -963,10 +966,7 @@ class CiOptions: for job in jobs_to_do[:]: job_param = jobs_params[job] - if ( - job_param["run_if_ci_option_include_set"] - and job not in jobs_to_do_requested - ): + if job_param["run_by_ci_option"] and job not in jobs_to_do_requested: print( f"Erasing job '{job}' from list because it's not in included set, but will run only by include" ) @@ -991,7 +991,16 @@ def normalize_check_name(check_name: str) -> str: def parse_args(parser: argparse.ArgumentParser) -> argparse.Namespace: - # FIXME: consider switching to sub_parser for configure, pre, run, post actions + parser.add_argument( + "--cancel-previous-run", + action="store_true", + help="Action that cancels previous running PR workflow if PR added into the Merge Queue", + ) + parser.add_argument( + "--set-pending-status", + action="store_true", + help="Action to set needed pending statuses in the beginning of CI workflow, e.g. for Sync wf", + ) parser.add_argument( "--configure", action="store_true", @@ -1000,17 +1009,19 @@ def parse_args(parser: argparse.ArgumentParser) -> argparse.Namespace: parser.add_argument( "--update-gh-statuses", action="store_true", - help="Action that recreate success GH statuses for jobs that finished successfully in past and will be skipped this time", + help="Action that recreate success GH statuses for jobs that finished successfully in past and will be " + "skipped this time", ) parser.add_argument( "--pre", action="store_true", - help="Action that executes prerequesetes for the job provided in --job-name", + help="Action that executes prerequisites for the job provided in --job-name", ) parser.add_argument( "--run", action="store_true", - help="Action that executes run action for specified --job-name. run_command must be configured for a given job name.", + help="Action that executes run action for specified --job-name. run_command must be configured for a given " + "job name.", ) parser.add_argument( "--post", @@ -1075,7 +1086,7 @@ def parse_args(parser: argparse.ArgumentParser) -> argparse.Namespace: "--skip-jobs", action="store_true", default=False, - help="skip fetching data about job runs, used in --configure action (for debugging and nigthly ci)", + help="skip fetching data about job runs, used in --configure action (for debugging and nightly ci)", ) parser.add_argument( "--force", @@ -1088,7 +1099,8 @@ def parse_args(parser: argparse.ArgumentParser) -> argparse.Namespace: "--rebuild-all-binaries", action="store_true", default=False, - help="[DEPRECATED. to be removed, once no wf use it] will create run config without skipping build jobs in any case, used in --configure action (for release branches)", + help="[DEPRECATED. to be removed, once no wf use it] will create run config without skipping build jobs in " + "any case, used in --configure action (for release branches)", ) parser.add_argument( "--commit-message", @@ -1293,7 +1305,7 @@ def _configure_docker_jobs(docker_digest_or_latest: bool) -> Dict: missing_amd64 = [] missing_aarch64 = [] if not docker_digest_or_latest: - # look for missing arm and amd images only among missing multiarch manifests @missing_multi_dict + # look for missing arm and amd images only among missing multi-arch manifests @missing_multi_dict # to avoid extra dockerhub api calls missing_amd64 = list( check_missing_images_on_dockerhub(missing_multi_dict, "amd64") @@ -1391,7 +1403,7 @@ def _configure_jobs( ): continue - # fill job randomization buckets (for jobs with configured @random_bucket property)) + # fill job randomization buckets (for jobs with configured @random_bucket property) if job_config.random_bucket: if not job_config.random_bucket in randomization_buckets: randomization_buckets[job_config.random_bucket] = set() @@ -1440,8 +1452,7 @@ def _configure_jobs( jobs_params[job] = { "batches": batches_to_do, "num_batches": num_batches, - "run_if_ci_option_include_set": job_config.run_by_ci_option - and pr_info.is_pr, + "run_by_ci_option": job_config.run_by_ci_option and pr_info.is_pr, } elif add_to_skip: # treat job as being skipped only if it's controlled by digest @@ -1485,8 +1496,8 @@ def _configure_jobs( def _generate_ci_stage_config(jobs_data: Dict[str, Any]) -> Dict[str, Dict[str, Any]]: """ populates GH Actions' workflow with real jobs - "Builds_1": [{"job_name": NAME, "runner_type": RUNER_TYPE}] - "Tests_1": [{"job_name": NAME, "runner_type": RUNER_TYPE}] + "Builds_1": [{"job_name": NAME, "runner_type": RUNNER_TYPE}] + "Tests_1": [{"job_name": NAME, "runner_type": RUNNER_TYPE}] ... """ result = {} # type: Dict[str, Any] @@ -1577,7 +1588,7 @@ def _fetch_commit_tokens(message: str, pr_info: PRInfo) -> List[str]: for match in matches if match in CILabels or match.startswith("job_") or match.startswith("batch_") ] - print(f"CI modifyers from commit message: [{res}]") + print(f"CI modifiers from commit message: [{res}]") res_2 = [] if pr_info.is_pr: matches = [match[-1] for match in re.findall(pattern, pr_info.body)] @@ -1588,7 +1599,7 @@ def _fetch_commit_tokens(message: str, pr_info: PRInfo) -> List[str]: or match.startswith("job_") or match.startswith("batch_") ] - print(f"CI modifyers from PR body: [{res_2}]") + print(f"CI modifiers from PR body: [{res_2}]") return list(set(res + res_2)) @@ -1654,7 +1665,7 @@ def _upload_build_artifacts( report_url = ci_cache.upload_build_report(build_result) print(f"Report file has been uploaded to [{report_url}]") - # Upload head master binaries + # Upload master head's binaries static_bin_name = CI_CONFIG.build_config[build_name].static_binary_name if pr_info.is_master and static_bin_name: # Full binary with debug info: @@ -1902,6 +1913,42 @@ def _get_ext_check_name(check_name: str) -> str: return check_name_with_group +def _cancel_pr_wf(s3: S3Helper, pr_number: int, cancel_sync: bool = False) -> None: + wf_data = CiMetadata(s3, pr_number).fetch_meta() + if not cancel_sync: + if not wf_data.run_id: + print(f"ERROR: FIX IT: Run id has not been found PR [{pr_number}]!") + else: + print( + f"Canceling PR workflow run_id: [{wf_data.run_id}], pr: [{pr_number}]" + ) + GitHub.cancel_wf(GITHUB_REPOSITORY, wf_data.run_id, get_best_robot_token()) + else: + if not wf_data.sync_pr_run_id: + print("WARNING: Sync PR run id has not been found") + else: + print(f"Canceling sync PR workflow run_id: [{wf_data.sync_pr_run_id}]") + GitHub.cancel_wf( + "ClickHouse/clickhouse-private", + wf_data.sync_pr_run_id, + get_best_robot_token(), + ) + + +def _set_pending_statuses(pr_info: PRInfo) -> None: + commit = get_commit(GitHub(get_best_robot_token(), per_page=100), pr_info.sha) + try: + print("Set SYNC status to pending") + commit.create_status( + state=PENDING, + target_url="", + description="", + context=StatusNames.SYNC, + ) + except Exception as ex: + print(f"ERROR: failed to set GH commit status, ex: {ex}") + + def main() -> int: logging.basicConfig(level=logging.INFO) exit_code = 0 @@ -1930,6 +1977,12 @@ def main() -> int: ### CONFIGURE action: start if args.configure: + if CI and pr_info.is_pr: + # store meta on s3 (now we need it only for PRs) + meta = CiMetadata(s3, pr_info.number, pr_info.head_ref) + meta.run_id = int(GITHUB_RUN_ID) + meta.push_meta() + ci_options = CiOptions.create_from_pr_message( args.commit_message or None, update_from_api=True ) @@ -2222,6 +2275,22 @@ def main() -> int: assert indata, "Run config must be provided via --infile" _update_gh_statuses_action(indata=indata, s3=s3) + ### CANCEL PREVIOUS WORKFLOW RUN + elif args.cancel_previous_run: + if pr_info.is_merge_queue: + _cancel_pr_wf(s3, pr_info.merged_pr) + elif pr_info.is_pr: + _cancel_pr_wf(s3, pr_info.number, cancel_sync=True) + else: + assert False, "BUG! Not supported scenario" + + ### SET PENDING STATUS + elif args.set_pending_status: + if pr_info.is_pr: + _set_pending_statuses(pr_info) + else: + assert False, "BUG! Not supported scenario" + ### print results _print_results(result, args.outfile, args.pretty) diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index c3421998ca9..68fa6f1cf10 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -52,9 +52,9 @@ class CILabels(metaclass=WithIter): CI_SET_ARM = "ci_set_arm" CI_SET_INTEGRATION = "ci_set_integration" CI_SET_OLD_ANALYZER = "ci_set_old_analyzer" - CI_SET_STATLESS = "ci_set_stateless" + CI_SET_STATELESS = "ci_set_stateless" CI_SET_STATEFUL = "ci_set_stateful" - CI_SET_STATLESS_ASAN = "ci_set_stateless_asan" + CI_SET_STATELESS_ASAN = "ci_set_stateless_asan" CI_SET_STATEFUL_ASAN = "ci_set_stateful_asan" libFuzzer = "libFuzzer" @@ -206,7 +206,7 @@ class DigestConfig: include_paths: List[Union[str, Path]] = field(default_factory=list) # file suffixes to exclude from digest exclude_files: List[str] = field(default_factory=list) - # directories to exlude from digest + # directories to exclude from digest exclude_dirs: List[Union[str, Path]] = field(default_factory=list) # docker names to include into digest docker: List[str] = field(default_factory=list) @@ -217,7 +217,7 @@ class DigestConfig: @dataclass class LabelConfig: """ - configures different CI scenarious per GH label + configures different CI scenarios per GH label """ run_jobs: Iterable[str] = frozenset() @@ -231,7 +231,7 @@ class JobConfig: # configures digest calculation for the job digest: DigestConfig = field(default_factory=DigestConfig) - # will be triggered for the job if omited in CI workflow yml + # will be triggered for the job if omitted in CI workflow yml run_command: str = "" # job timeout, seconds timeout: Optional[int] = None @@ -242,7 +242,7 @@ class JobConfig: # to run always regardless of the job digest or/and label run_always: bool = False # if the job needs to be run on the release branch, including master (e.g. building packages, docker server). - # NOTE: Subsequent runs on the same branch with the similar digest are still considered skippable. + # NOTE: Subsequent runs on the same branch with the similar digest are still considered skip-able. required_on_release_branch: bool = False # job is for pr workflow only pr_only: bool = False @@ -470,7 +470,7 @@ compatibility_test_common_params = { "digest": compatibility_check_digest, "run_command": "compatibility_check.py", } -statless_test_common_params = { +stateless_test_common_params = { "digest": stateless_check_digest, "run_command": 'functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT', "timeout": 10800, @@ -665,7 +665,7 @@ class CIConfig: # crosscompile - no arm required pass else: - # switch to aarch64 runnner + # switch to aarch64 runner result += "-aarch64" return result @@ -712,7 +712,7 @@ class CIConfig: break assert ( res - ), f"Error: Experimantal feature... Invlid request or not supported job [{check_name}]" + ), f"Error: Experimental feature... Invalid request or not supported job [{check_name}]" return res def get_digest_config(self, check_name: str) -> DigestConfig: @@ -815,16 +815,16 @@ class CIConfig: f"The following names of the build report '{build_report_name}' " f"are missed in build_config: {missed_names}", ) - # And finally, all of tests' requirements must be in the builds + # And finally, all tests' requirements must be in the builds for test_name, test_config in self.test_configs.items(): if test_config.required_build not in self.build_config.keys(): logging.error( - "The requierment '%s' for '%s' is not found in builds", + "The requirement '%s' for '%s' is not found in builds", test_config, test_name, ) errors.append( - f"The requierment '{test_config}' for " + f"The requirement '{test_config}' for " f"'{test_name}' is not found in builds" ) @@ -865,7 +865,7 @@ CI_CONFIG = CIConfig( JobNames.INTEGRATION_TEST_ASAN_OLD_ANALYZER, ] ), - CILabels.CI_SET_STATLESS: LabelConfig( + CILabels.CI_SET_STATELESS: LabelConfig( run_jobs=[ JobNames.STYLE_CHECK, JobNames.FAST_TEST, @@ -873,7 +873,7 @@ CI_CONFIG = CIConfig( JobNames.STATELESS_TEST_RELEASE, ] ), - CILabels.CI_SET_STATLESS_ASAN: LabelConfig( + CILabels.CI_SET_STATELESS_ASAN: LabelConfig( run_jobs=[ JobNames.STYLE_CHECK, JobNames.FAST_TEST, @@ -1180,49 +1180,49 @@ CI_CONFIG = CIConfig( # End stateful tests for parallel replicas JobNames.STATELESS_TEST_ASAN: TestConfig( Build.PACKAGE_ASAN, - job_config=JobConfig(num_batches=4, **statless_test_common_params), # type: ignore + job_config=JobConfig(num_batches=4, **stateless_test_common_params), # type: ignore ), JobNames.STATELESS_TEST_TSAN: TestConfig( Build.PACKAGE_TSAN, - job_config=JobConfig(num_batches=5, **statless_test_common_params), # type: ignore + job_config=JobConfig(num_batches=5, **stateless_test_common_params), # type: ignore ), JobNames.STATELESS_TEST_MSAN: TestConfig( Build.PACKAGE_MSAN, - job_config=JobConfig(num_batches=6, **statless_test_common_params), # type: ignore + job_config=JobConfig(num_batches=6, **stateless_test_common_params), # type: ignore ), JobNames.STATELESS_TEST_UBSAN: TestConfig( Build.PACKAGE_UBSAN, - job_config=JobConfig(num_batches=2, **statless_test_common_params), # type: ignore + job_config=JobConfig(num_batches=2, **stateless_test_common_params), # type: ignore ), JobNames.STATELESS_TEST_DEBUG: TestConfig( Build.PACKAGE_DEBUG, - job_config=JobConfig(num_batches=5, **statless_test_common_params), # type: ignore + job_config=JobConfig(num_batches=5, **stateless_test_common_params), # type: ignore ), JobNames.STATELESS_TEST_RELEASE: TestConfig( - Build.PACKAGE_RELEASE, job_config=JobConfig(**statless_test_common_params) # type: ignore + Build.PACKAGE_RELEASE, job_config=JobConfig(**stateless_test_common_params) # type: ignore ), JobNames.STATELESS_TEST_RELEASE_COVERAGE: TestConfig( Build.PACKAGE_RELEASE_COVERAGE, - job_config=JobConfig(num_batches=6, **statless_test_common_params), # type: ignore + job_config=JobConfig(num_batches=6, **stateless_test_common_params), # type: ignore ), JobNames.STATELESS_TEST_AARCH64: TestConfig( - Build.PACKAGE_AARCH64, job_config=JobConfig(**statless_test_common_params) # type: ignore + Build.PACKAGE_AARCH64, job_config=JobConfig(**stateless_test_common_params) # type: ignore ), JobNames.STATELESS_TEST_OLD_ANALYZER_S3_REPLICATED_RELEASE: TestConfig( Build.PACKAGE_RELEASE, - job_config=JobConfig(num_batches=4, **statless_test_common_params), # type: ignore + job_config=JobConfig(num_batches=4, **stateless_test_common_params), # type: ignore ), JobNames.STATELESS_TEST_S3_DEBUG: TestConfig( Build.PACKAGE_DEBUG, - job_config=JobConfig(num_batches=6, **statless_test_common_params), # type: ignore + job_config=JobConfig(num_batches=6, **stateless_test_common_params), # type: ignore ), JobNames.STATELESS_TEST_AZURE_ASAN: TestConfig( Build.PACKAGE_ASAN, - job_config=JobConfig(num_batches=4, **statless_test_common_params, release_only=True, run_by_ci_option=True), # type: ignore + job_config=JobConfig(num_batches=4, **stateless_test_common_params, release_only=True, run_by_ci_option=True), # type: ignore ), JobNames.STATELESS_TEST_S3_TSAN: TestConfig( Build.PACKAGE_TSAN, - job_config=JobConfig(num_batches=5, **statless_test_common_params), # type: ignore + job_config=JobConfig(num_batches=5, **stateless_test_common_params), # type: ignore ), JobNames.STRESS_TEST_DEBUG: TestConfig( Build.PACKAGE_DEBUG, job_config=JobConfig(**stress_test_common_params) # type: ignore @@ -1271,8 +1271,7 @@ CI_CONFIG = CIConfig( ), JobNames.INTEGRATION_TEST_ARM: TestConfig( Build.PACKAGE_AARCH64, - # add [run_by_label="test arm"] to not run in regular pr workflow by default - job_config=JobConfig(num_batches=6, **integration_test_common_params, run_by_label="test arm"), # type: ignore + job_config=JobConfig(num_batches=6, **integration_test_common_params), # type: ignore ), JobNames.INTEGRATION_TEST: TestConfig( Build.PACKAGE_RELEASE, @@ -1326,7 +1325,7 @@ CI_CONFIG = CIConfig( JobNames.STATELESS_TEST_FLAKY_ASAN: TestConfig( # replace to non-default Build.PACKAGE_ASAN, - job_config=JobConfig(pr_only=True, **{**statless_test_common_params, "timeout": 3600}), # type: ignore + job_config=JobConfig(pr_only=True, **{**stateless_test_common_params, "timeout": 3600}), # type: ignore ), JobNames.JEPSEN_KEEPER: TestConfig( Build.BINARY_RELEASE, @@ -1486,7 +1485,7 @@ CHECK_DESCRIPTIONS = [ "Checks if new added or modified tests are flaky by running them repeatedly, " "in parallel, with more randomization. Functional tests are run 100 times " "with address sanitizer, and additional randomization of thread scheduling. " - "Integrational tests are run up to 10 times. If at least once a new test has " + "Integration tests are run up to 10 times. If at least once a new test has " "failed, or was too long, this check will be red. We don't allow flaky tests, " 'read the doc', @@ -1576,7 +1575,7 @@ CHECK_DESCRIPTIONS = [ lambda x: x.startswith("ClickBench"), ), CheckDescription( - "Falback for unknown", + "Fallback for unknown", "There's no description for the check yet, please add it to " "tests/ci/ci_config.py:CHECK_DESCRIPTIONS", lambda x: True, diff --git a/tests/ci/ci_metadata.py b/tests/ci/ci_metadata.py new file mode 100644 index 00000000000..a767d102811 --- /dev/null +++ b/tests/ci/ci_metadata.py @@ -0,0 +1,151 @@ +from pathlib import Path +from typing import Optional + +from env_helper import ( + S3_BUILDS_BUCKET, + TEMP_PATH, + GITHUB_UPSTREAM_REPOSITORY, + GITHUB_REPOSITORY, + S3_BUILDS_BUCKET_PUBLIC, +) +from s3_helper import S3Helper +from ci_utils import GHActions +from synchronizer_utils import SYNC_BRANCH_PREFIX + + +# pylint: disable=too-many-lines + + +class CiMetadata: + """ + CI Metadata class owns data like workflow run_id for a given pr, etc. + Goal is to have everything we need to manage workflows on S3 and rely on GH api as little as possible + """ + + _S3_PREFIX = "CI_meta_v1" + _LOCAL_PATH = Path(TEMP_PATH) / "ci_meta" + _FILE_SUFFIX = ".cimd" + _FILENAME_RUN_ID = "run_id" + _FILE_SUFFIX + _FILENAME_SYNC_PR_RUN_ID = "sync_pr_run_id" + _FILE_SUFFIX + + def __init__( + self, + s3: S3Helper, + pr_number: Optional[int] = None, + git_ref: Optional[str] = None, + sha: Optional[str] = None, + ): + assert pr_number or (sha and git_ref) + + self.sha = sha + self.pr_number = pr_number + self.git_ref = git_ref + self.s3 = s3 + self.run_id = 0 + self.upstream_pr_number = 0 + self.sync_pr_run_id = 0 + + if self.pr_number: + self.s3_path = f"{self._S3_PREFIX}/PRs/{self.pr_number}/" + else: + self.s3_path = f"{self._S3_PREFIX}/{self.git_ref}/{self.sha}/" + + # Process upstream StatusNames.SYNC: + # metadata path for upstream pr + self.s3_path_upstream = "" + if ( + self.git_ref + and self.git_ref.startswith(f"{SYNC_BRANCH_PREFIX}/pr/") + and GITHUB_REPOSITORY != GITHUB_UPSTREAM_REPOSITORY + ): + self.upstream_pr_number = int(self.git_ref.split("/pr/", maxsplit=1)[1]) + self.s3_path_upstream = f"{self._S3_PREFIX}/PRs/{self.upstream_pr_number}/" + + self._updated = False + + if not self._LOCAL_PATH.exists(): + self._LOCAL_PATH.mkdir(parents=True, exist_ok=True) + + def fetch_meta(self): + """ + Fetches meta from s3 + """ + + # clean up + for file in self._LOCAL_PATH.glob("*" + self._FILE_SUFFIX): + file.unlink() + + _ = self.s3.download_files( + bucket=S3_BUILDS_BUCKET, + s3_path=self.s3_path, + file_suffix=self._FILE_SUFFIX, + local_directory=self._LOCAL_PATH, + ) + + meta_files = Path(self._LOCAL_PATH).rglob("*" + self._FILE_SUFFIX) + for file_name in meta_files: + path_in_str = str(file_name) + with open(path_in_str, "r", encoding="utf-8") as f: + # Read all lines in the file + lines = f.readlines() + assert len(lines) == 1 + if file_name.name == self._FILENAME_RUN_ID: + self.run_id = int(lines[0]) + elif file_name.name == self._FILENAME_SYNC_PR_RUN_ID: + self.sync_pr_run_id = int(lines[0]) + + self._updated = True + return self + + def push_meta( + self, + ) -> None: + """ + Uploads meta on s3 + """ + assert self.run_id + assert self.git_ref, "Push meta only with full info" + + if not self.upstream_pr_number: + log_title = f"Storing workflow metadata: PR [{self.pr_number}]" + else: + log_title = f"Storing workflow metadata: PR [{self.pr_number}], upstream PR [{self.upstream_pr_number}]" + + GHActions.print_in_group( + log_title, + [f"run_id: {self.run_id}"], + ) + + local_file = self._LOCAL_PATH / self._FILENAME_RUN_ID + with open(local_file, "w", encoding="utf-8") as file: + file.write(f"{self.run_id}\n") + + _ = self.s3.upload_file( + bucket=S3_BUILDS_BUCKET, + file_path=local_file, + s3_path=self.s3_path + self._FILENAME_RUN_ID, + ) + + if self.upstream_pr_number: + # store run id in upstream pr meta as well + _ = self.s3.upload_file( + bucket=S3_BUILDS_BUCKET_PUBLIC, + file_path=local_file, + s3_path=self.s3_path_upstream + self._FILENAME_SYNC_PR_RUN_ID, + ) + + +if __name__ == "__main__": + # TEST: + s3 = S3Helper() + a = CiMetadata(s3, 12345, "deadbeaf", "test_branch") + a.run_id = 111 + a.push_meta() + b = CiMetadata(s3, 12345, "deadbeaf", "test_branch") + assert b.fetch_meta().run_id == a.run_id + + a = CiMetadata(s3, 0, "deadbeaf", "test_branch") + a.run_id = 112 + a.push_meta() + b = CiMetadata(s3, 0, "deadbeaf", "test_branch") + assert b.fetch_meta().run_id == a.run_id diff --git a/tests/ci/commit_status_helper.py b/tests/ci/commit_status_helper.py index e1c47353743..b17c189c405 100644 --- a/tests/ci/commit_status_helper.py +++ b/tests/ci/commit_status_helper.py @@ -20,7 +20,6 @@ from github.Repository import Repository from ci_config import CHECK_DESCRIPTIONS, CheckDescription, StatusNames, is_required from env_helper import ( GITHUB_REPOSITORY, - GITHUB_RUN_URL, GITHUB_UPSTREAM_REPOSITORY, TEMP_PATH, ) @@ -433,11 +432,8 @@ def set_mergeable_check( commit: Commit, description: str = "", state: StatusType = SUCCESS, - hide_url: bool = False, ) -> CommitStatus: - report_url = GITHUB_RUN_URL - if hide_url: - report_url = "" + report_url = "" return post_commit_status( commit, state, @@ -469,7 +465,6 @@ def update_mergeable_check(commit: Commit, pr_info: PRInfo, check_name: str) -> def trigger_mergeable_check( commit: Commit, statuses: CommitStatuses, - hide_url: bool = False, set_if_green: bool = False, workflow_failed: bool = False, ) -> StatusType: @@ -484,25 +479,30 @@ def trigger_mergeable_check( success = [] fail = [] + pending = [] for status in required_checks: if status.state == SUCCESS: success.append(status.context) + elif status.state == PENDING: + pending.append(status.context) else: fail.append(status.context) state: StatusType = SUCCESS - if success: - description = ", ".join(success) - else: - description = "awaiting job statuses" - if fail: description = "failed: " + ", ".join(fail) state = FAILURE elif workflow_failed: description = "check workflow failures" state = FAILURE + elif pending: + description = "pending: " + ", ".join(pending) + state = PENDING + else: + # all good + description = ", ".join(success) + description = format_description(description) if not set_if_green and state == SUCCESS: @@ -510,7 +510,7 @@ def trigger_mergeable_check( pass else: if mergeable_status is None or mergeable_status.description != description: - set_mergeable_check(commit, description, state, hide_url) + set_mergeable_check(commit, description, state) return state @@ -556,13 +556,12 @@ def update_upstream_sync_status( post_commit_status( last_synced_upstream_commit, sync_status, - "", # let's won't expose any urls from cloud + "", "", StatusNames.SYNC, ) trigger_mergeable_check( last_synced_upstream_commit, get_commit_filtered_statuses(last_synced_upstream_commit), - True, set_if_green=can_set_green_mergeable_status, ) diff --git a/tests/ci/env_helper.py b/tests/ci/env_helper.py index 9b9652d5bd3..64614ffa611 100644 --- a/tests/ci/env_helper.py +++ b/tests/ci/env_helper.py @@ -31,6 +31,7 @@ IMAGES_PATH = os.getenv("IMAGES_PATH", TEMP_PATH) REPO_COPY = os.getenv("REPO_COPY", GITHUB_WORKSPACE) RUNNER_TEMP = os.getenv("RUNNER_TEMP", p.abspath(p.join(module_dir, "./tmp"))) S3_BUILDS_BUCKET = os.getenv("S3_BUILDS_BUCKET", "clickhouse-builds") +S3_BUILDS_BUCKET_PUBLIC = "clickhouse-builds" S3_TEST_REPORTS_BUCKET = os.getenv("S3_TEST_REPORTS_BUCKET", "clickhouse-test-reports") S3_URL = os.getenv("S3_URL", "https://s3.amazonaws.com") S3_DOWNLOAD = os.getenv("S3_DOWNLOAD", S3_URL) diff --git a/tests/ci/finish_check.py b/tests/ci/finish_check.py index 1a7000f5353..269d5aa3175 100644 --- a/tests/ci/finish_check.py +++ b/tests/ci/finish_check.py @@ -15,7 +15,7 @@ from commit_status_helper import ( ) from get_robot_token import get_best_robot_token from pr_info import PRInfo -from report import PENDING, SUCCESS +from report import PENDING from synchronizer_utils import SYNC_BRANCH_PREFIX from env_helper import GITHUB_REPOSITORY, GITHUB_UPSTREAM_REPOSITORY @@ -67,7 +67,7 @@ def main(): if status.state == PENDING: post_commit_status( commit, - SUCCESS, + state, # map Mergeable Check status to CI Running status.target_url, "All checks finished", StatusNames.CI, diff --git a/tests/ci/github_helper.py b/tests/ci/github_helper.py index ae1eaf4c06a..eb0f6c24527 100644 --- a/tests/ci/github_helper.py +++ b/tests/ci/github_helper.py @@ -9,6 +9,7 @@ from time import sleep from typing import List, Optional, Tuple, Union import github +import requests # explicit reimport # pylint: disable=useless-import-alias @@ -260,3 +261,17 @@ class GitHub(github.Github): def retries(self, value: int) -> None: assert isinstance(value, int) self._retries = value + + # static methods not using pygithub + @staticmethod + def cancel_wf(repo, run_id, token, strict=False): + headers = {"Authorization": f"token {token}"} + url = f"https://api.github.com/repos/{repo}/actions/runs/{run_id}/cancel" + try: + response = requests.post(url, headers=headers, timeout=10) + response.raise_for_status() + print(f"NOTE: Workflow [{run_id}] has been cancelled") + except Exception as ex: + print("ERROR: Got exception executing wf cancel request", ex) + if strict: + raise ex diff --git a/tests/ci/merge_pr.py b/tests/ci/merge_pr.py index 500de4eb718..e1c7bf94ff5 100644 --- a/tests/ci/merge_pr.py +++ b/tests/ci/merge_pr.py @@ -250,7 +250,6 @@ def main(): trigger_mergeable_check( commit, statuses, - hide_url=False, set_if_green=True, workflow_failed=(args.wf_status != "success"), ) diff --git a/tests/ci/report.py b/tests/ci/report.py index 8676c998afb..670a10f4561 100644 --- a/tests/ci/report.py +++ b/tests/ci/report.py @@ -401,30 +401,40 @@ class BuildResult: @classmethod def load_any(cls, build_name: str, pr_number: int, head_ref: str): # type: ignore """ - loads report from suitable report file with the following priority: - 1. report from PR with the same @pr_number - 2. report from branch with the same @head_ref - 3. report from the master - 4. any other report + loads build report from one of all available report files (matching the job digest) + with the following priority: + 1. report for the current PR @pr_number (might happen in PR' wf with or without job reuse) + 2. report for the current branch @head_ref (might happen in release/master' wf with or without job reuse) + 3. report for master branch (might happen in any workflow in case of job reuse) + 4. any other report (job reuse from another PR, if master report is not available yet) """ - reports = [] + pr_report = None + ref_report = None + master_report = None + any_report = None for file in Path(REPORT_PATH).iterdir(): if f"{build_name}.json" in file.name: - reports.append(file) - if not reports: - return None - file_path = None - for file in reports: - if pr_number and f"_{pr_number}_" in file.name: - file_path = file - break - if f"_{head_ref}_" in file.name: - file_path = file - break + any_report = file if "_master_" in file.name: - file_path = file - break - return cls.load_from_file(file_path or reports[-1]) + master_report = file + elif f"_{head_ref}_" in file.name: + ref_report = file + elif pr_number and f"_{pr_number}_" in file.name: + pr_report = file + + if not any_report: + return None + + if pr_report: + file_path = pr_report + elif ref_report: + file_path = ref_report + elif master_report: + file_path = master_report + else: + file_path = any_report + + return cls.load_from_file(file_path) @classmethod def load_from_file(cls, file: Union[Path, str]): # type: ignore diff --git a/tests/ci/test_ci_options.py b/tests/ci/test_ci_options.py index 0f10f7d4f85..c07c094d439 100644 --- a/tests/ci/test_ci_options.py +++ b/tests/ci/test_ci_options.py @@ -161,7 +161,7 @@ class TestCIOptions(unittest.TestCase): "Stateless tests (azure, asan)": { "batches": list(range(3)), "num_batches": 3, - "run_if_ci_option_include_set": True, + "run_by_ci_option": True, } } jobs_to_do, jobs_to_skip, job_params = ci_options.apply( @@ -226,10 +226,10 @@ class TestCIOptions(unittest.TestCase): job_params[job] = { "batches": list(range(3)), "num_batches": 3, - "run_if_ci_option_include_set": "azure" in job, + "run_by_ci_option": "azure" in job, } else: - job_params[job] = {"run_if_ci_option_include_set": False} + job_params[job] = {"run_by_ci_option": False} jobs_to_do, jobs_to_skip, job_params = ci_options.apply( jobs_to_do, jobs_to_skip, job_params, PRInfo() diff --git a/tests/config/config.d/max_num_to_warn.xml b/tests/config/config.d/max_num_to_warn.xml index 776c270823d..1f55e6fd674 100644 --- a/tests/config/config.d/max_num_to_warn.xml +++ b/tests/config/config.d/max_num_to_warn.xml @@ -1,5 +1,7 @@ 5 + 5 + 5 2 10 diff --git a/tests/fuzz/dictionaries/datatypes.dict b/tests/fuzz/dictionaries/datatypes.dict index 232e89db0c0..a01a94fd3e3 100644 --- a/tests/fuzz/dictionaries/datatypes.dict +++ b/tests/fuzz/dictionaries/datatypes.dict @@ -132,3 +132,4 @@ "YEAR" "bool" "boolean" +"Dynamic" diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index c2bea3060aa..41c162217d2 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -513,6 +513,7 @@ class ClickHouseCluster: self.minio_redirect_host = "proxy1" self.minio_redirect_ip = None self.minio_redirect_port = 8080 + self.minio_docker_id = self.get_instance_docker_id(self.minio_host) self.spark_session = None diff --git a/tests/integration/helpers/s3_mocks/broken_s3.py b/tests/integration/helpers/s3_mocks/broken_s3.py index 206f960293f..7d0127bc1c4 100644 --- a/tests/integration/helpers/s3_mocks/broken_s3.py +++ b/tests/integration/helpers/s3_mocks/broken_s3.py @@ -165,11 +165,35 @@ class _ServerRuntime: '' "" "ExpectedError" - "mock s3 injected error" + "mock s3 injected unretryable error" "txfbd566d03042474888193-00608d7537" "" ) - request_handler.write_error(data) + request_handler.write_error(500, data) + + class SlowDownAction: + def inject_error(self, request_handler): + data = ( + '' + "" + "SlowDown" + "Slow Down." + "txfbd566d03042474888193-00608d7537" + "" + ) + request_handler.write_error(429, data) + + class QpsLimitExceededAction: + def inject_error(self, request_handler): + data = ( + '' + "" + "QpsLimitExceeded" + "Please reduce your request rate." + "txfbd566d03042474888193-00608d7537" + "" + ) + request_handler.write_error(429, data) class RedirectAction: def __init__(self, host="localhost", port=1): @@ -239,6 +263,12 @@ class _ServerRuntime: self.error_handler = _ServerRuntime.BrokenPipeAction() elif self.action == "redirect_to": self.error_handler = _ServerRuntime.RedirectAction(*self.action_args) + elif self.action == "slow_down": + self.error_handler = _ServerRuntime.SlowDownAction(*self.action_args) + elif self.action == "qps_limit_exceeded": + self.error_handler = _ServerRuntime.QpsLimitExceededAction( + *self.action_args + ) else: self.error_handler = _ServerRuntime.Expected500ErrorAction() @@ -344,12 +374,12 @@ class RequestHandler(http.server.BaseHTTPRequestHandler): self.end_headers() self.wfile.write(b"Redirected") - def write_error(self, data, content_length=None): + def write_error(self, http_code, data, content_length=None): if content_length is None: content_length = len(data) self.log_message("write_error %s", data) self.read_all_input() - self.send_response(500) + self.send_response(http_code) self.send_header("Content-Type", "text/xml") self.send_header("Content-Length", str(content_length)) self.end_headers() @@ -418,7 +448,7 @@ class RequestHandler(http.server.BaseHTTPRequestHandler): path = [x for x in parts.path.split("/") if x] assert path[0] == "mock_settings", path if len(path) < 2: - return self.write_error("_mock_settings: wrong command") + return self.write_error(400, "_mock_settings: wrong command") if path[1] == "at_part_upload": params = urllib.parse.parse_qs(parts.query, keep_blank_values=False) @@ -477,7 +507,7 @@ class RequestHandler(http.server.BaseHTTPRequestHandler): self.log_message("reset") return self._ok() - return self.write_error("_mock_settings: wrong command") + return self.write_error(400, "_mock_settings: wrong command") def do_GET(self): if self.path == "/": diff --git a/tests/integration/test_backup_restore_s3/configs/disk_s3_restricted_user.xml b/tests/integration/test_backup_restore_s3/configs/disk_s3_restricted_user.xml new file mode 100644 index 00000000000..323e986f966 --- /dev/null +++ b/tests/integration/test_backup_restore_s3/configs/disk_s3_restricted_user.xml @@ -0,0 +1,22 @@ + + + + + + s3 + http://minio1:9001/root/data/disks/disk_s3_restricted_user/ + miniorestricted1 + minio123 + + + + + +
+ disk_s3_restricted_user +
+
+
+
+
+
diff --git a/tests/integration/test_backup_restore_s3/test.py b/tests/integration/test_backup_restore_s3/test.py index 05424887736..967ed6a221c 100644 --- a/tests/integration/test_backup_restore_s3/test.py +++ b/tests/integration/test_backup_restore_s3/test.py @@ -3,8 +3,11 @@ import pytest from helpers.cluster import ClickHouseCluster from helpers.test_tools import TSV import uuid +import os +CONFIG_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), "configs") + cluster = ClickHouseCluster(__file__) node = cluster.add_instance( "node", @@ -20,13 +23,127 @@ node = cluster.add_instance( ], with_minio=True, with_zookeeper=True, + stay_alive=True, ) +def setup_minio_users(): + # create 2 extra users with restricted access + # miniorestricted1 - full access to bucket 'root', no access to other buckets + # miniorestricted2 - full access to bucket 'root2', no access to other buckets + # storage policy 'policy_s3_restricted' defines a policy for storing files inside bucket 'root' using 'miniorestricted1' user + for user, bucket in [("miniorestricted1", "root"), ("miniorestricted2", "root2")]: + print( + cluster.exec_in_container( + cluster.minio_docker_id, + [ + "mc", + "alias", + "set", + "root", + "http://minio1:9001", + "minio", + "minio123", + ], + ) + ) + policy = f""" +{{ + "Version": "2012-10-17", + "Statement": [ + {{ + "Effect": "Allow", + "Principal": {{ + "AWS": [ + "*" + ] + }}, + "Action": [ + "s3:GetBucketLocation", + "s3:ListBucket", + "s3:ListBucketMultipartUploads" + ], + "Resource": [ + "arn:aws:s3:::{bucket}" + ] + }}, + {{ + "Effect": "Allow", + "Principal": {{ + "AWS": [ + "*" + ] + }}, + "Action": [ + "s3:AbortMultipartUpload", + "s3:DeleteObject", + "s3:GetObject", + "s3:ListMultipartUploadParts", + "s3:PutObject" + ], + "Resource": [ + "arn:aws:s3:::{bucket}/*" + ] + }} + ] +}}""" + + cluster.exec_in_container( + cluster.minio_docker_id, + ["bash", "-c", f"cat >/tmp/{bucket}_policy.json < 1000000 1 + 0 diff --git a/tests/integration/test_checking_s3_blobs_paranoid/configs/s3_retries.xml b/tests/integration/test_checking_s3_blobs_paranoid/configs/s3_retries.xml index 95a313ea4f2..c1ca258f6c4 100644 --- a/tests/integration/test_checking_s3_blobs_paranoid/configs/s3_retries.xml +++ b/tests/integration/test_checking_s3_blobs_paranoid/configs/s3_retries.xml @@ -5,6 +5,7 @@ 5 0 + 0 diff --git a/tests/integration/test_checking_s3_blobs_paranoid/test.py b/tests/integration/test_checking_s3_blobs_paranoid/test.py index 22d6d263d23..a7fe02b16de 100644 --- a/tests/integration/test_checking_s3_blobs_paranoid/test.py +++ b/tests/integration/test_checking_s3_blobs_paranoid/test.py @@ -91,7 +91,7 @@ def get_multipart_counters(node, query_id, log_type="ExceptionWhileProcessing"): SELECT ProfileEvents['S3CreateMultipartUpload'], ProfileEvents['S3UploadPart'], - ProfileEvents['S3WriteRequestsErrors'], + ProfileEvents['S3WriteRequestsErrors'] + ProfileEvents['S3WriteRequestsThrottling'], FROM system.query_log WHERE query_id='{query_id}' AND type='{log_type}' @@ -148,7 +148,7 @@ def test_upload_s3_fail_create_multi_part_upload(cluster, broken_s3, compression ) assert "Code: 499" in error, error - assert "mock s3 injected error" in error, error + assert "mock s3 injected unretryable error" in error, error create_multipart, upload_parts, s3_errors = get_multipart_counters( node, insert_query_id @@ -190,7 +190,7 @@ def test_upload_s3_fail_upload_part_when_multi_part_upload( ) assert "Code: 499" in error, error - assert "mock s3 injected error" in error, error + assert "mock s3 injected unretryable error" in error, error create_multipart, upload_parts, s3_errors = get_multipart_counters( node, insert_query_id @@ -200,18 +200,32 @@ def test_upload_s3_fail_upload_part_when_multi_part_upload( assert s3_errors >= 2 -def test_when_s3_connection_refused_is_retried(cluster, broken_s3): +@pytest.mark.parametrize( + "action_and_message", + [ + ("slow_down", "DB::Exception: Slow Down."), + ("qps_limit_exceeded", "DB::Exception: Please reduce your request rate."), + ( + "connection_refused", + "Poco::Exception. Code: 1000, e.code() = 111, Connection refused", + ), + ], + ids=lambda x: x[0], +) +def test_when_error_is_retried(cluster, broken_s3, action_and_message): node = cluster.instances["node"] - broken_s3.setup_fake_multpartuploads() - broken_s3.setup_at_part_upload(count=3, after=2, action="connection_refused") + action, message = action_and_message - insert_query_id = f"INSERT_INTO_TABLE_FUNCTION_CONNECTION_REFUSED_RETRIED" + broken_s3.setup_fake_multpartuploads() + broken_s3.setup_at_part_upload(count=3, after=2, action=action) + + insert_query_id = f"INSERT_INTO_TABLE_{action}_RETRIED" node.query( f""" INSERT INTO TABLE FUNCTION s3( - 'http://resolver:8083/root/data/test_when_s3_connection_refused_at_write_retried', + 'http://resolver:8083/root/data/test_when_{action}_retried', 'minio', 'minio123', 'CSV', auto, 'none' ) @@ -234,13 +248,13 @@ def test_when_s3_connection_refused_is_retried(cluster, broken_s3): assert upload_parts == 39 assert s3_errors == 3 - broken_s3.setup_at_part_upload(count=1000, after=2, action="connection_refused") - insert_query_id = f"INSERT_INTO_TABLE_FUNCTION_CONNECTION_REFUSED_RETRIED_1" + broken_s3.setup_at_part_upload(count=1000, after=2, action=action) + insert_query_id = f"INSERT_INTO_TABLE_{action}_RETRIED_1" error = node.query_and_get_error( f""" INSERT INTO TABLE FUNCTION s3( - 'http://resolver:8083/root/data/test_when_s3_connection_refused_at_write_retried', + 'http://resolver:8083/root/data/test_when_{action}_retried', 'minio', 'minio123', 'CSV', auto, 'none' ) @@ -257,8 +271,78 @@ def test_when_s3_connection_refused_is_retried(cluster, broken_s3): ) assert "Code: 499" in error, error + assert message in error, error + + +def test_when_s3_broken_pipe_at_upload_is_retried(cluster, broken_s3): + node = cluster.instances["node"] + + broken_s3.setup_fake_multpartuploads() + broken_s3.setup_at_part_upload( + count=3, + after=2, + action="broken_pipe", + ) + + insert_query_id = f"TEST_WHEN_S3_BROKEN_PIPE_AT_UPLOAD" + node.query( + f""" + INSERT INTO + TABLE FUNCTION s3( + 'http://resolver:8083/root/data/test_when_s3_broken_pipe_at_upload_is_retried', + 'minio', 'minio123', + 'CSV', auto, 'none' + ) + SELECT + * + FROM system.numbers + LIMIT 1000000 + SETTINGS + s3_max_single_part_upload_size=100, + s3_min_upload_part_size=1000000, + s3_check_objects_after_upload=0 + """, + query_id=insert_query_id, + ) + + create_multipart, upload_parts, s3_errors = get_multipart_counters( + node, insert_query_id, log_type="QueryFinish" + ) + + assert create_multipart == 1 + assert upload_parts == 7 + assert s3_errors == 3 + + broken_s3.setup_at_part_upload( + count=1000, + after=2, + action="broken_pipe", + ) + insert_query_id = f"TEST_WHEN_S3_BROKEN_PIPE_AT_UPLOAD_1" + error = node.query_and_get_error( + f""" + INSERT INTO + TABLE FUNCTION s3( + 'http://resolver:8083/root/data/test_when_s3_broken_pipe_at_upload_is_retried', + 'minio', 'minio123', + 'CSV', auto, 'none' + ) + SELECT + * + FROM system.numbers + LIMIT 1000000 + SETTINGS + s3_max_single_part_upload_size=100, + s3_min_upload_part_size=1000000, + s3_check_objects_after_upload=0 + """, + query_id=insert_query_id, + ) + + assert "Code: 1000" in error, error assert ( - "Poco::Exception. Code: 1000, e.code() = 111, Connection refused" in error + "DB::Exception: Poco::Exception. Code: 1000, e.code() = 32, I/O error: Broken pipe" + in error ), error @@ -401,20 +485,20 @@ def test_when_s3_connection_reset_by_peer_at_create_mpu_retried( ) error = node.query_and_get_error( f""" - INSERT INTO - TABLE FUNCTION s3( - 'http://resolver:8083/root/data/test_when_s3_connection_reset_by_peer_at_create_mpu_retried', - 'minio', 'minio123', - 'CSV', auto, 'none' - ) - SELECT - * - FROM system.numbers - LIMIT 1000 - SETTINGS - s3_max_single_part_upload_size=100, - s3_min_upload_part_size=100, - s3_check_objects_after_upload=0 + INSERT INTO + TABLE FUNCTION s3( + 'http://resolver:8083/root/data/test_when_s3_connection_reset_by_peer_at_create_mpu_retried', + 'minio', 'minio123', + 'CSV', auto, 'none' + ) + SELECT + * + FROM system.numbers + LIMIT 1000 + SETTINGS + s3_max_single_part_upload_size=100, + s3_min_upload_part_size=100, + s3_check_objects_after_upload=0 """, query_id=insert_query_id, ) @@ -427,78 +511,6 @@ def test_when_s3_connection_reset_by_peer_at_create_mpu_retried( ), error -def test_when_s3_broken_pipe_at_upload_is_retried(cluster, broken_s3): - node = cluster.instances["node"] - - broken_s3.setup_fake_multpartuploads() - broken_s3.setup_at_part_upload( - count=3, - after=2, - action="broken_pipe", - ) - - insert_query_id = f"TEST_WHEN_S3_BROKEN_PIPE_AT_UPLOAD" - node.query( - f""" - INSERT INTO - TABLE FUNCTION s3( - 'http://resolver:8083/root/data/test_when_s3_broken_pipe_at_upload_is_retried', - 'minio', 'minio123', - 'CSV', auto, 'none' - ) - SELECT - * - FROM system.numbers - LIMIT 1000000 - SETTINGS - s3_max_single_part_upload_size=100, - s3_min_upload_part_size=1000000, - s3_check_objects_after_upload=0 - """, - query_id=insert_query_id, - ) - - create_multipart, upload_parts, s3_errors = get_multipart_counters( - node, insert_query_id, log_type="QueryFinish" - ) - - assert create_multipart == 1 - assert upload_parts == 7 - assert s3_errors == 3 - - broken_s3.setup_at_part_upload( - count=1000, - after=2, - action="broken_pipe", - ) - insert_query_id = f"TEST_WHEN_S3_BROKEN_PIPE_AT_UPLOAD_1" - error = node.query_and_get_error( - f""" - INSERT INTO - TABLE FUNCTION s3( - 'http://resolver:8083/root/data/test_when_s3_broken_pipe_at_upload_is_retried', - 'minio', 'minio123', - 'CSV', auto, 'none' - ) - SELECT - * - FROM system.numbers - LIMIT 1000000 - SETTINGS - s3_max_single_part_upload_size=100, - s3_min_upload_part_size=1000000, - s3_check_objects_after_upload=0 - """, - query_id=insert_query_id, - ) - - assert "Code: 1000" in error, error - assert ( - "DB::Exception: Poco::Exception. Code: 1000, e.code() = 32, I/O error: Broken pipe" - in error - ), error - - def test_query_is_canceled_with_inf_retries(cluster, broken_s3): node = cluster.instances["node_with_inf_s3_retries"] diff --git a/tests/integration/test_group_array_element_size/configs/group_array_max_element_size.xml b/tests/integration/test_group_array_element_size/configs/group_array_max_element_size.xml index 80409d3e18b..32d5d131a44 100644 --- a/tests/integration/test_group_array_element_size/configs/group_array_max_element_size.xml +++ b/tests/integration/test_group_array_element_size/configs/group_array_max_element_size.xml @@ -1,4 +1,4 @@ 10 - false + throw diff --git a/tests/integration/test_group_array_element_size/test.py b/tests/integration/test_group_array_element_size/test.py index 1eb7647d734..90b2712ffbf 100644 --- a/tests/integration/test_group_array_element_size/test.py +++ b/tests/integration/test_group_array_element_size/test.py @@ -80,8 +80,8 @@ def test_limit_size(started_cluster): node2.replace_in_config( "/etc/clickhouse-server/config.d/group_array_max_element_size.xml", - "false", - "true", + "throw", + "discard", ) node2.restart_clickhouse() @@ -91,8 +91,8 @@ def test_limit_size(started_cluster): node2.replace_in_config( "/etc/clickhouse-server/config.d/group_array_max_element_size.xml", - "true", - "false", + "discard", + "throw", ) node2.restart_clickhouse() diff --git a/tests/integration/test_lazy_database/__init__.py b/tests/integration/test_lazy_database/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_lazy_database/configs/storage_policy.xml b/tests/integration/test_lazy_database/configs/storage_policy.xml new file mode 100644 index 00000000000..58771d6b284 --- /dev/null +++ b/tests/integration/test_lazy_database/configs/storage_policy.xml @@ -0,0 +1,12 @@ + + + + + s3 + http://minio1:9001/root/data/ + minio + minio123 + + + + diff --git a/tests/integration/test_lazy_database/test.py b/tests/integration/test_lazy_database/test.py new file mode 100644 index 00000000000..6890aa87374 --- /dev/null +++ b/tests/integration/test_lazy_database/test.py @@ -0,0 +1,88 @@ +import logging +import time +import pytest +import os +from helpers.cluster import ClickHouseCluster + + +@pytest.fixture(scope="module") +def cluster(): + try: + cluster = ClickHouseCluster(__file__) + cluster.add_instance( + "node", + main_configs=["configs/storage_policy.xml"], + with_minio=True, + ) + logging.info("Starting cluster...") + cluster.start() + logging.info("Cluster started") + + yield cluster + finally: + cluster.shutdown() + + +def assert_objects_count(cluster, objects_count, path="data/"): + minio = cluster.minio_client + s3_objects = list(minio.list_objects(cluster.minio_bucket, path, recursive=True)) + if objects_count != len(s3_objects): + for s3_object in s3_objects: + object_meta = minio.stat_object(cluster.minio_bucket, s3_object.object_name) + logging.info("Existing S3 object: %s", str(object_meta)) + assert objects_count == len(s3_objects) + + +def list_of_files_on_ch_disk(node, disk, path): + disk_path = node.query( + f"SELECT path FROM system.disks WHERE name='{disk}'" + ).splitlines()[0] + return node.exec_in_container( + ["bash", "-c", f"ls {os.path.join(disk_path, path)}"], user="root" + ) + + +@pytest.mark.parametrize( + "engine", + [ + pytest.param("Log"), + ], +) +@pytest.mark.parametrize( + "disk,check_s3", + [ + pytest.param("default", False), + pytest.param("s3", True), + ], +) +@pytest.mark.parametrize( + "delay", + [ + pytest.param(0), + pytest.param(4), + ], +) +def test_drop_table(cluster, engine, disk, check_s3, delay): + node = cluster.instances["node"] + + node.query("DROP DATABASE IF EXISTS lazy") + node.query("CREATE DATABASE lazy ENGINE=Lazy(2)") + node.query( + "CREATE TABLE lazy.table (id UInt64) ENGINE={} SETTINGS disk = '{}'".format( + engine, + disk, + ) + ) + + node.query("INSERT INTO lazy.table SELECT number FROM numbers(10)") + assert node.query("SELECT count(*) FROM lazy.table") == "10\n" + if delay: + time.sleep(delay) + node.query("DROP TABLE lazy.table SYNC") + + if check_s3: + # There mustn't be any orphaned data + assert_objects_count(cluster, 0) + + # Local data must be removed + assert list_of_files_on_ch_disk(node, disk, "data/lazy/") == "" diff --git a/tests/integration/test_merge_tree_s3/test.py b/tests/integration/test_merge_tree_s3/test.py index 9216b08f942..0bf81e81383 100644 --- a/tests/integration/test_merge_tree_s3/test.py +++ b/tests/integration/test_merge_tree_s3/test.py @@ -857,9 +857,9 @@ def test_merge_canceled_by_s3_errors(cluster, broken_s3, node_name, storage_poli error = node.query_and_get_error( "OPTIMIZE TABLE test_merge_canceled_by_s3_errors FINAL", ) - assert "ExpectedError Message: mock s3 injected error" in error, error + assert "ExpectedError Message: mock s3 injected unretryable error" in error, error - node.wait_for_log_line("ExpectedError Message: mock s3 injected error") + node.wait_for_log_line("ExpectedError Message: mock s3 injected unretryable error") table_uuid = node.query( "SELECT uuid FROM system.tables WHERE database = 'default' AND name = 'test_merge_canceled_by_s3_errors' LIMIT 1" @@ -867,7 +867,7 @@ def test_merge_canceled_by_s3_errors(cluster, broken_s3, node_name, storage_poli node.query("SYSTEM FLUSH LOGS") error_count_in_blob_log = node.query( - f"SELECT count() FROM system.blob_storage_log WHERE query_id like '{table_uuid}::%' AND error like '%mock s3 injected error%'" + f"SELECT count() FROM system.blob_storage_log WHERE query_id like '{table_uuid}::%' AND error like '%mock s3 injected unretryable error%'" ).strip() assert int(error_count_in_blob_log) > 0, node.query( f"SELECT * FROM system.blob_storage_log WHERE query_id like '{table_uuid}::%' FORMAT PrettyCompactMonoBlock" @@ -911,7 +911,7 @@ def test_merge_canceled_by_s3_errors_when_move(cluster, broken_s3, node_name): node.query("OPTIMIZE TABLE merge_canceled_by_s3_errors_when_move FINAL") - node.wait_for_log_line("ExpectedError Message: mock s3 injected error") + node.wait_for_log_line("ExpectedError Message: mock s3 injected unretryable error") count = node.query("SELECT count() FROM merge_canceled_by_s3_errors_when_move") assert int(count) == 2000, count diff --git a/tests/integration/test_modify_engine_on_restart/configs/config.d/clusters.xml b/tests/integration/test_modify_engine_on_restart/configs/config.d/clusters.xml index c8bbb7f3530..d3a9d4fb8f0 100644 --- a/tests/integration/test_modify_engine_on_restart/configs/config.d/clusters.xml +++ b/tests/integration/test_modify_engine_on_restart/configs/config.d/clusters.xml @@ -19,4 +19,4 @@ 01
- + \ No newline at end of file diff --git a/tests/integration/test_modify_engine_on_restart/configs/config.d/clusters_zk_path.xml b/tests/integration/test_modify_engine_on_restart/configs/config.d/clusters_unusual.xml similarity index 80% rename from tests/integration/test_modify_engine_on_restart/configs/config.d/clusters_zk_path.xml rename to tests/integration/test_modify_engine_on_restart/configs/config.d/clusters_unusual.xml index ba13cd87031..812291335b8 100644 --- a/tests/integration/test_modify_engine_on_restart/configs/config.d/clusters_zk_path.xml +++ b/tests/integration/test_modify_engine_on_restart/configs/config.d/clusters_unusual.xml @@ -15,6 +15,6 @@ 01
-/clickhouse/'/{database}/{table}/{uuid} +/lol/kek/'/{uuid} diff --git a/tests/integration/test_modify_engine_on_restart/test_unusual_path.py b/tests/integration/test_modify_engine_on_restart/test_unusual_path.py index 20d2c29257b..e82f48e8b34 100644 --- a/tests/integration/test_modify_engine_on_restart/test_unusual_path.py +++ b/tests/integration/test_modify_engine_on_restart/test_unusual_path.py @@ -6,7 +6,7 @@ cluster = ClickHouseCluster(__file__) ch1 = cluster.add_instance( "ch1", main_configs=[ - "configs/config.d/clusters_zk_path.xml", + "configs/config.d/clusters_unusual.xml", "configs/config.d/distributed_ddl.xml", ], with_zookeeper=True, @@ -63,7 +63,7 @@ def check_tables(): ) .strip() .startswith( - "ReplicatedReplacingMergeTree(\\'/clickhouse/\\\\\\'/{database}/{table}/{uuid}\\', \\'{replica}\\', D)" + "ReplicatedReplacingMergeTree(\\'/lol/kek/\\\\\\'/{uuid}\\', \\'{replica}\\', D)" ) ) assert ( @@ -73,7 +73,7 @@ def check_tables(): ) .strip() .startswith( - "ReplicatedVersionedCollapsingMergeTree(\\'/clickhouse/\\\\\\'/{database}/{table}/{uuid}\\', \\'{replica}\\', Sign, Version)" + "ReplicatedVersionedCollapsingMergeTree(\\'/lol/kek/\\\\\\'/{uuid}\\', \\'{replica}\\', Sign, Version)" ) ) diff --git a/tests/integration/test_modify_engine_on_restart/test_zk_path.py b/tests/integration/test_modify_engine_on_restart/test_zk_path.py deleted file mode 100644 index dd633ad0810..00000000000 --- a/tests/integration/test_modify_engine_on_restart/test_zk_path.py +++ /dev/null @@ -1,69 +0,0 @@ -import pytest -from test_modify_engine_on_restart.common import ( - get_table_path, - set_convert_flags, -) -from helpers.cluster import ClickHouseCluster - -cluster = ClickHouseCluster(__file__) -ch1 = cluster.add_instance( - "ch1", - main_configs=[ - "configs/config.d/clusters_zk_path.xml", - "configs/config.d/distributed_ddl.xml", - ], - with_zookeeper=True, - macros={"replica": "node1"}, - stay_alive=True, -) - -database_name = "modify_engine_zk_path" - - -@pytest.fixture(scope="module") -def started_cluster(): - try: - cluster.start() - yield cluster - - finally: - cluster.shutdown() - - -def q(node, query): - return node.query(database=database_name, sql=query) - - -def test_modify_engine_fails_if_zk_path_exists(started_cluster): - ch1.query("CREATE DATABASE " + database_name) - - q( - ch1, - "CREATE TABLE already_exists_1 ( A Int64, D Date, S String ) ENGINE MergeTree() PARTITION BY toYYYYMM(D) ORDER BY A;", - ) - uuid = q( - ch1, - f"SELECT uuid FROM system.tables WHERE table = 'already_exists_1' and database = '{database_name}'", - ).strip("'[]\n") - - q( - ch1, - f"CREATE TABLE already_exists_2 ( A Int64, D Date, S String ) ENGINE ReplicatedMergeTree('/clickhouse/\\'/{database_name}/already_exists_1/{uuid}', 'r2') PARTITION BY toYYYYMM(D) ORDER BY A;", - ) - - set_convert_flags(ch1, database_name, ["already_exists_1"]) - - table_data_path = get_table_path(ch1, "already_exists_1", database_name) - - ch1.stop_clickhouse() - ch1.start_clickhouse(retry_start=False, expected_to_fail=True) - - # Check if we can cancel convertation - ch1.exec_in_container( - [ - "bash", - "-c", - f"rm {table_data_path}convert_to_replicated", - ] - ) - ch1.start_clickhouse() diff --git a/tests/integration/test_storage_azure_blob_storage/test.py b/tests/integration/test_storage_azure_blob_storage/test.py index 78aaf26a2a7..f836c58ce30 100644 --- a/tests/integration/test_storage_azure_blob_storage/test.py +++ b/tests/integration/test_storage_azure_blob_storage/test.py @@ -30,6 +30,8 @@ def cluster(): with_azurite=True, ) cluster.start() + container_client = cluster.blob_service_client.get_container_client("cont") + container_client.create_container() yield cluster finally: cluster.shutdown() @@ -130,8 +132,10 @@ def test_create_table_connection_string(cluster): node = cluster.instances["node"] azure_query( node, - f"CREATE TABLE test_create_table_conn_string (key UInt64, data String) Engine = AzureBlobStorage('{cluster.env_variables['AZURITE_CONNECTION_STRING']}'," - f"'cont', 'test_create_connection_string', 'CSV')", + f""" + CREATE TABLE test_create_table_conn_string (key UInt64, data String) + Engine = AzureBlobStorage('{cluster.env_variables['AZURITE_CONNECTION_STRING']}', 'cont', 'test_create_connection_string', 'CSV') + """, ) diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py index bb72574c6e5..44c0223e677 100644 --- a/tests/integration/test_storage_hdfs/test.py +++ b/tests/integration/test_storage_hdfs/test.py @@ -61,7 +61,12 @@ def test_read_write_storage_with_globs(started_cluster): hdfs_api.write_data("/storage" + i, i + "\tMark\t72.53\n") assert hdfs_api.read_data("/storage" + i) == i + "\tMark\t72.53\n" - assert node1.query("select count(*) from HDFSStorageWithRange") == "3\n" + assert ( + node1.query( + "select count(*) from HDFSStorageWithRange settings s3_throw_on_zero_files_match=1" + ) + == "3\n" + ) assert node1.query("select count(*) from HDFSStorageWithEnum") == "3\n" assert node1.query("select count(*) from HDFSStorageWithQuestionMark") == "3\n" assert node1.query("select count(*) from HDFSStorageWithAsterisk") == "3\n" @@ -159,7 +164,7 @@ def test_bad_hdfs_uri(started_cluster): ) except Exception as ex: print(ex) - assert "Unable to create builder to connect to HDFS" in str(ex) + assert "Unable to connect to HDFS" in str(ex) try: node1.query( @@ -321,7 +326,7 @@ def test_virtual_columns(started_cluster): hdfs_api.write_data("/file1", "1\n") hdfs_api.write_data("/file2", "2\n") hdfs_api.write_data("/file3", "3\n") - expected = "1\tfile1\thdfs://hdfs1:9000/file1\n2\tfile2\thdfs://hdfs1:9000/file2\n3\tfile3\thdfs://hdfs1:9000/file3\n" + expected = "1\tfile1\tfile1\n2\tfile2\tfile2\n3\tfile3\tfile3\n" assert ( node1.query( "select id, _file as file_name, _path as file_path from virtual_cols order by id" @@ -360,7 +365,12 @@ def test_truncate_table(started_cluster): assert hdfs_api.read_data("/tr") == "1\tMark\t72.53\n" assert node1.query("select * from test_truncate") == "1\tMark\t72.53\n" node1.query("truncate table test_truncate") - assert node1.query("select * from test_truncate") == "" + assert ( + node1.query( + "select * from test_truncate settings hdfs_ignore_file_doesnt_exist=1" + ) + == "" + ) node1.query("drop table test_truncate") @@ -483,13 +493,13 @@ def test_hdfsCluster(started_cluster): actual = node1.query( "select id, _file as file_name, _path as file_path from hdfs('hdfs://hdfs1:9000/test_hdfsCluster/file*', 'TSV', 'id UInt32') order by id" ) - expected = "1\tfile1\thdfs://hdfs1:9000/test_hdfsCluster/file1\n2\tfile2\thdfs://hdfs1:9000/test_hdfsCluster/file2\n3\tfile3\thdfs://hdfs1:9000/test_hdfsCluster/file3\n" + expected = "1\tfile1\ttest_hdfsCluster/file1\n2\tfile2\ttest_hdfsCluster/file2\n3\tfile3\ttest_hdfsCluster/file3\n" assert actual == expected actual = node1.query( "select id, _file as file_name, _path as file_path from hdfsCluster('test_cluster_two_shards', 'hdfs://hdfs1:9000/test_hdfsCluster/file*', 'TSV', 'id UInt32') order by id" ) - expected = "1\tfile1\thdfs://hdfs1:9000/test_hdfsCluster/file1\n2\tfile2\thdfs://hdfs1:9000/test_hdfsCluster/file2\n3\tfile3\thdfs://hdfs1:9000/test_hdfsCluster/file3\n" + expected = "1\tfile1\ttest_hdfsCluster/file1\n2\tfile2\ttest_hdfsCluster/file2\n3\tfile3\ttest_hdfsCluster/file3\n" assert actual == expected fs.delete(dir, recursive=True) @@ -497,7 +507,9 @@ def test_hdfsCluster(started_cluster): def test_hdfs_directory_not_exist(started_cluster): ddl = "create table HDFSStorageWithNotExistDir (id UInt32, name String, weight Float64) ENGINE = HDFS('hdfs://hdfs1:9000/data/not_eixst', 'TSV')" node1.query(ddl) - assert "" == node1.query("select * from HDFSStorageWithNotExistDir") + assert "" == node1.query( + "select * from HDFSStorageWithNotExistDir settings hdfs_ignore_file_doesnt_exist=1" + ) def test_overwrite(started_cluster): @@ -653,7 +665,7 @@ def test_virtual_columns_2(started_cluster): node1.query(f"insert into table function {table_function} SELECT 1, 'kek'") result = node1.query(f"SELECT _path FROM {table_function}") - assert result.strip() == "hdfs://hdfs1:9000/parquet_2" + assert result.strip() == "parquet_2" table_function = ( f"hdfs('hdfs://hdfs1:9000/parquet_3', 'Parquet', 'a Int32, _path String')" @@ -895,7 +907,7 @@ def test_hdfsCluster_unset_skip_unavailable_shards(started_cluster): assert ( node1.query( - "select * from hdfsCluster('cluster_non_existent_port', 'hdfs://hdfs1:9000/skip_unavailable_shards', 'TSV', 'id UInt64, text String, number Float64')" + "select * from hdfsCluster('cluster_non_existent_port', 'hdfs://hdfs1:9000/unskip_unavailable_shards', 'TSV', 'id UInt64, text String, number Float64')" ) == data ) @@ -966,37 +978,25 @@ def test_read_subcolumns(started_cluster): f"select a.b.d, _path, a.b, _file, a.e from hdfs('hdfs://hdfs1:9000/test_subcolumns.tsv', auto, 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')" ) - assert ( - res - == "2\thdfs://hdfs1:9000/test_subcolumns.tsv\t(1,2)\ttest_subcolumns.tsv\t3\n" - ) + assert res == "2\ttest_subcolumns.tsv\t(1,2)\ttest_subcolumns.tsv\t3\n" res = node.query( f"select a.b.d, _path, a.b, _file, a.e from hdfs('hdfs://hdfs1:9000/test_subcolumns.jsonl', auto, 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')" ) - assert ( - res - == "2\thdfs://hdfs1:9000/test_subcolumns.jsonl\t(1,2)\ttest_subcolumns.jsonl\t3\n" - ) + assert res == "2\ttest_subcolumns.jsonl\t(1,2)\ttest_subcolumns.jsonl\t3\n" res = node.query( f"select x.b.d, _path, x.b, _file, x.e from hdfs('hdfs://hdfs1:9000/test_subcolumns.jsonl', auto, 'x Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')" ) - assert ( - res - == "0\thdfs://hdfs1:9000/test_subcolumns.jsonl\t(0,0)\ttest_subcolumns.jsonl\t0\n" - ) + assert res == "0\ttest_subcolumns.jsonl\t(0,0)\ttest_subcolumns.jsonl\t0\n" res = node.query( f"select x.b.d, _path, x.b, _file, x.e from hdfs('hdfs://hdfs1:9000/test_subcolumns.jsonl', auto, 'x Tuple(b Tuple(c UInt32, d UInt32), e UInt32) default ((42, 42), 42)')" ) - assert ( - res - == "42\thdfs://hdfs1:9000/test_subcolumns.jsonl\t(42,42)\ttest_subcolumns.jsonl\t42\n" - ) + assert res == "42\ttest_subcolumns.jsonl\t(42,42)\ttest_subcolumns.jsonl\t42\n" def test_union_schema_inference_mode(started_cluster): diff --git a/tests/integration/test_storage_kerberized_hdfs/test.py b/tests/integration/test_storage_kerberized_hdfs/test.py index c72152fa376..ddfc1f6483d 100644 --- a/tests/integration/test_storage_kerberized_hdfs/test.py +++ b/tests/integration/test_storage_kerberized_hdfs/test.py @@ -130,7 +130,7 @@ def test_prohibited(started_cluster): assert False, "Exception have to be thrown" except Exception as ex: assert ( - "Unable to open HDFS file: /storage_user_two_prohibited error: Permission denied: user=specuser, access=WRITE" + "Unable to open HDFS file: /storage_user_two_prohibited (hdfs://suser@kerberizedhdfs1:9010/storage_user_two_prohibited) error: Permission denied: user=specuser, access=WRITE" in str(ex) ) diff --git a/tests/integration/test_storage_kerberized_kafka/test.py b/tests/integration/test_storage_kerberized_kafka/test.py index 451e1ab2ccf..24d10d7ff83 100644 --- a/tests/integration/test_storage_kerberized_kafka/test.py +++ b/tests/integration/test_storage_kerberized_kafka/test.py @@ -5,7 +5,7 @@ import time import pytest import logging -from helpers.cluster import ClickHouseCluster +from helpers.cluster import ClickHouseCluster, is_arm from helpers.test_tools import TSV from helpers.client import QueryRuntimeException @@ -18,6 +18,10 @@ from kafka.protocol.admin import DescribeGroupsResponse_v1, DescribeGroupsReques from kafka.protocol.group import MemberAssignment import socket +if is_arm(): + # skip due to no arm support for clickhouse/kerberos-kdc docker image + pytestmark = pytest.mark.skip + cluster = ClickHouseCluster(__file__) instance = cluster.add_instance( "instance", diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py index dc929b7db46..09b27fff1e8 100644 --- a/tests/integration/test_storage_s3/test.py +++ b/tests/integration/test_storage_s3/test.py @@ -1816,27 +1816,13 @@ def test_schema_inference_cache(started_cluster): check_cache(instance, []) run_describe_query(instance, files, storage_name, started_cluster, bucket) - check_cache_misses( - instance, - files, - storage_name, - started_cluster, - bucket, - 4 if storage_name == "url" else 1, - ) + check_cache_misses(instance, files, storage_name, started_cluster, bucket, 4) instance.query("system drop schema cache") check_cache(instance, []) run_describe_query(instance, files, storage_name, started_cluster, bucket) - check_cache_misses( - instance, - files, - storage_name, - started_cluster, - bucket, - 4 if storage_name == "url" else 1, - ) + check_cache_misses(instance, files, storage_name, started_cluster, bucket, 4) instance.query("system drop schema cache") diff --git a/tests/performance/function_tokens.xml b/tests/performance/function_tokens.xml index 63b72f83df3..1ff56323d62 100644 --- a/tests/performance/function_tokens.xml +++ b/tests/performance/function_tokens.xml @@ -1,3 +1,5 @@ with 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' as s select splitByChar(' ', materialize(s)) as w from numbers(1000000) + with 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' as s select splitByRegexp(' ', materialize(s)) as w from numbers(1000000) + with 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' as s select splitByRegexp('\s+', materialize(s)) as w from numbers(100000) diff --git a/tests/queries/0_stateless/00331_final_and_prewhere_condition_ver_column.reference b/tests/queries/0_stateless/00331_final_and_prewhere_condition_ver_column.reference new file mode 100644 index 00000000000..6ed281c757a --- /dev/null +++ b/tests/queries/0_stateless/00331_final_and_prewhere_condition_ver_column.reference @@ -0,0 +1,2 @@ +1 +1 diff --git a/tests/queries/0_stateless/00331_final_and_prewhere_condition_ver_column.sql b/tests/queries/0_stateless/00331_final_and_prewhere_condition_ver_column.sql new file mode 100644 index 00000000000..78a58a979d1 --- /dev/null +++ b/tests/queries/0_stateless/00331_final_and_prewhere_condition_ver_column.sql @@ -0,0 +1,16 @@ +SET allow_experimental_analyzer = 1; + +-- https://github.com/ClickHouse/ClickHouse/issues/45804 + +CREATE TABLE myRMT( + key Int64, + someCol String, + ver DateTime +) ENGINE = ReplacingMergeTree(ver) +ORDER BY key as SELECT 1, 'test', '2020-01-01'; + +SELECT count(ver) FROM myRMT FINAL PREWHERE ver > '2000-01-01'; + +SELECT count() FROM myRMT FINAL PREWHERE ver > '2000-01-01'; + +DROP TABLE myRMT; diff --git a/tests/queries/0_stateless/00910_buffer_prewhere_different_types.sql b/tests/queries/0_stateless/00910_buffer_prewhere_different_types.sql index 8f305914cb8..702d9bb3e6c 100644 --- a/tests/queries/0_stateless/00910_buffer_prewhere_different_types.sql +++ b/tests/queries/0_stateless/00910_buffer_prewhere_different_types.sql @@ -2,8 +2,14 @@ DROP TABLE IF EXISTS buffer_table1__fuzz_28; DROP TABLE IF EXISTS merge_tree_table1; CREATE TABLE merge_tree_table1 (`x` UInt32) ENGINE = MergeTree ORDER BY x; + +CREATE TABLE buffer_table1__fuzz_24 (`s` Nullable(Int128), `x` Nullable(FixedString(17))) ENGINE = Buffer(currentDatabase(), 'merge_tree_table1', 16, 10, 60, 10, 1000, 1048576, 2097152); +SELECT s FROM buffer_table1__fuzz_24 PREWHERE factorial(toNullable(10)); -- { serverError ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER } + INSERT INTO merge_tree_table1 VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9), (10); +SELECT s FROM buffer_table1__fuzz_24 PREWHERE factorial(toNullable(10)); -- { serverError ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER } + SET send_logs_level='error'; CREATE TABLE buffer_table1__fuzz_28 (`x` Nullable(UInt32)) ENGINE = Buffer(currentDatabase(), 'merge_tree_table1', 16, 10, 60, 10, 1000, 1048576, 2097152); diff --git a/tests/queries/0_stateless/01227_distributed_merge_global_in_primary_key.reference b/tests/queries/0_stateless/01227_distributed_merge_global_in_primary_key.reference new file mode 100644 index 00000000000..f572a3570f4 --- /dev/null +++ b/tests/queries/0_stateless/01227_distributed_merge_global_in_primary_key.reference @@ -0,0 +1,19 @@ +------------------- Distributed ------------------ +1 +---------- merge() over distributed -------------- +2 +---------- merge() over local -------------------- +1 +1 +1 +---------- remote() over Merge ------------------- +2 +---------- Distributed over Merge ---------------- +1 +---------- remote() over Merge ------------------- +2 +---------- Merge over Distributed ----------------- +1 +1 +1 +2 diff --git a/tests/queries/0_stateless/01227_distributed_merge_global_in_primary_key.sql b/tests/queries/0_stateless/01227_distributed_merge_global_in_primary_key.sql new file mode 100644 index 00000000000..6b0dd4c8747 --- /dev/null +++ b/tests/queries/0_stateless/01227_distributed_merge_global_in_primary_key.sql @@ -0,0 +1,83 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/64211 +DROP TABLE IF EXISTS test_merge; +DROP TABLE IF EXISTS test_merge_distributed; +DROP TABLE IF EXISTS test_distributed_merge; +DROP TABLE IF EXISTS test_distributed; +DROP TABLE IF EXISTS test_local; +CREATE TABLE test_local (name String) +ENGINE = MergeTree +ORDER BY name as select 'x'; + +CREATE TABLE test_distributed as test_local +ENGINE = Distributed(test_shard_localhost, currentDatabase(), test_local); + +CREATE TABLE test_merge as test_local +ENGINE = Merge(currentDatabase(), 'test_local'); + +CREATE TABLE test_merge_distributed as test_local +ENGINE = Distributed(test_shard_localhost, currentDatabase(), test_merge); + +CREATE TABLE test_distributed_merge as test_local +ENGINE = Merge(currentDatabase(), 'test_distributed'); + +SELECT '------------------- Distributed ------------------'; +SELECT count() +FROM test_distributed +WHERE name GLOBAL IN (SELECT name FROM test_distributed); + +SELECT '---------- merge() over distributed --------------'; +SELECT count() +FROM merge(currentDatabase(), 'test_distributed') +WHERE name GLOBAL IN (SELECT name FROM test_distributed); + +SELECT '---------- merge() over local --------------------'; +SELECT count() +FROM merge(currentDatabase(), 'test_local') +WHERE name GLOBAL IN (SELECT name FROM test_distributed); + +SELECT count() +FROM merge(currentDatabase(), 'test_local') +WHERE name GLOBAL IN (SELECT name FROM merge(currentDatabase(), 'test_local')); + +SELECT count() +FROM merge(currentDatabase(), 'test_local') +WHERE name GLOBAL IN (SELECT name FROM remote('127.0.0.{1,2}', currentDatabase(), test_merge)); + +SELECT '---------- remote() over Merge -------------------'; +SELECT count() +FROM remote('127.0.0.{1,2}', currentDatabase(), test_merge) +WHERE name GLOBAL IN (SELECT name FROM test_distributed); + +SELECT '---------- Distributed over Merge ----------------'; +SELECT count() +FROM test_merge_distributed +WHERE name GLOBAL IN (SELECT name FROM test_merge_distributed); + +SELECT '---------- remote() over Merge -------------------'; +SELECT count() +FROM remote('127.0.0.{1,2}', currentDatabase(), test_merge) +WHERE name GLOBAL IN (SELECT name FROM remote('127.0.0.{1,2}', currentDatabase(), test_merge)); + +SELECT '---------- Merge over Distributed -----------------'; +SELECT count() +FROM test_distributed_merge +WHERE name GLOBAL IN (SELECT name FROM remote('127.0.0.{1,2}', currentDatabase(), test_merge)); + +SELECT count() +FROM test_distributed_merge +WHERE name GLOBAL IN (SELECT name FROM remote('127.0.0.{1,2}', currentDatabase(), test_distributed_merge)); + +SELECT count() +FROM test_distributed_merge +WHERE name GLOBAL IN (SELECT name FROM test_distributed_merge); + +SELECT count() +FROM remote('127.0.0.{1,2}', currentDatabase(), test_distributed_merge) +WHERE name GLOBAL IN (SELECT name FROM remote('127.0.0.{1,2}', currentDatabase(), test_merge)); + + +DROP TABLE test_merge; +DROP TABLE test_merge_distributed; +DROP TABLE test_distributed_merge; +DROP TABLE test_distributed; +DROP TABLE test_local; diff --git a/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh b/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh index 67a2a70b509..1c1eb4489ee 100755 --- a/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh +++ b/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh @@ -11,7 +11,7 @@ REPLICA=$($CLICKHOUSE_CLIENT --query "Select getMacro('replica')") # Check that if we have one inactive replica and a huge number of INSERTs to active replicas, # the number of nodes in ZooKeeper does not grow unbounded. -SCALE=5000 +SCALE=1000 $CLICKHOUSE_CLIENT -n --query " DROP TABLE IF EXISTS r1; diff --git a/tests/queries/0_stateless/01442_merge_detach_attach_long.sh b/tests/queries/0_stateless/01442_merge_detach_attach_long.sh index acb2550d48c..85fdf7ed764 100755 --- a/tests/queries/0_stateless/01442_merge_detach_attach_long.sh +++ b/tests/queries/0_stateless/01442_merge_detach_attach_long.sh @@ -11,14 +11,24 @@ CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=none ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS t" ${CLICKHOUSE_CLIENT} --query="CREATE TABLE t (x Int8) ENGINE = MergeTree ORDER BY tuple()" -for _ in {1..100}; do - ${CLICKHOUSE_CLIENT} --query="INSERT INTO t VALUES (0)" - ${CLICKHOUSE_CLIENT} --query="INSERT INTO t VALUES (0)" - ${CLICKHOUSE_CLIENT} --query="OPTIMIZE TABLE t FINAL" 2>/dev/null & - ${CLICKHOUSE_CLIENT} --query="ALTER TABLE t DETACH PARTITION tuple()" - ${CLICKHOUSE_CLIENT} --query="SELECT count() FROM t HAVING count() > 0" -done +function thread_ops() +{ + local TIMELIMIT=$((SECONDS+$1)) + local it=0 + while [ $SECONDS -lt "$TIMELIMIT" ] && [ $it -lt 100 ]; + do + it=$((it+1)) + ${CLICKHOUSE_CLIENT} --query="INSERT INTO t VALUES (0)" + ${CLICKHOUSE_CLIENT} --query="INSERT INTO t VALUES (0)" + ${CLICKHOUSE_CLIENT} --query="OPTIMIZE TABLE t FINAL" 2>/dev/null & + ${CLICKHOUSE_CLIENT} --query="ALTER TABLE t DETACH PARTITION tuple()" + ${CLICKHOUSE_CLIENT} --query="SELECT count() FROM t HAVING count() > 0" + done +} +export -f thread_ops +TIMEOUT=60 +thread_ops $TIMEOUT & wait $CLICKHOUSE_CLIENT -q "DROP TABLE t" diff --git a/tests/queries/0_stateless/01866_split_by_regexp.reference b/tests/queries/0_stateless/01866_split_by_regexp.reference index a3ae2f35a5f..552d4d1f96a 100644 --- a/tests/queries/0_stateless/01866_split_by_regexp.reference +++ b/tests/queries/0_stateless/01866_split_by_regexp.reference @@ -5,3 +5,16 @@ ['gbye','bug'] [''] [] +Test fallback of splitByRegexp to splitByChar if regexp is trivial +['a','b','c'] +['a','b','c'] +['','','','','',''] +['a^b^c'] +['a$b$c'] +['a)b)c'] +['a','b','c'] +['a','b','c'] +['a','b','c'] +['a|b|c'] +['a\\b\\c'] +AST Fuzzer failure diff --git a/tests/queries/0_stateless/01866_split_by_regexp.sql b/tests/queries/0_stateless/01866_split_by_regexp.sql index e472fb68d94..bc25d3e1093 100644 --- a/tests/queries/0_stateless/01866_split_by_regexp.sql +++ b/tests/queries/0_stateless/01866_split_by_regexp.sql @@ -3,3 +3,23 @@ select splitByRegexp('', 'abcde'); select splitByRegexp('<[^<>]*>', x) from (select arrayJoin(['

hello

world

', 'gbyebug']) x); select splitByRegexp('ab', ''); select splitByRegexp('', ''); + +SELECT 'Test fallback of splitByRegexp to splitByChar if regexp is trivial'; +select splitByRegexp(' ', 'a b c'); +select splitByRegexp('-', 'a-b-c'); +select splitByRegexp('.', 'a.b.c'); +select splitByRegexp('^', 'a^b^c'); +select splitByRegexp('$', 'a$b$c'); +select splitByRegexp('+', 'a+b+c'); -- { serverError CANNOT_COMPILE_REGEXP } +select splitByRegexp('?', 'a?b?c'); -- { serverError CANNOT_COMPILE_REGEXP } +select splitByRegexp('(', 'a(b(c'); -- { serverError CANNOT_COMPILE_REGEXP } +select splitByRegexp(')', 'a)b)c'); +select splitByRegexp('[', 'a[b[c'); -- { serverError CANNOT_COMPILE_REGEXP } +select splitByRegexp(']', 'a]b]c'); +select splitByRegexp('{', 'a{b{c'); +select splitByRegexp('}', 'a}b}c'); +select splitByRegexp('|', 'a|b|c'); +select splitByRegexp('\\', 'a\\b\\c'); + +SELECT 'AST Fuzzer failure'; +SELECT splitByRegexp(materialize(1), NULL, 3) -- { serverError ILLEGAL_COLUMN } diff --git a/tests/queries/0_stateless/02114_hdfs_bad_url.sh b/tests/queries/0_stateless/02114_hdfs_bad_url.sh index 22975dddf6f..5bd5610a9f0 100755 --- a/tests/queries/0_stateless/02114_hdfs_bad_url.sh +++ b/tests/queries/0_stateless/02114_hdfs_bad_url.sh @@ -23,4 +23,3 @@ $CLICKHOUSE_CLIENT -q "SELECT * FROM hdfs('hdfs1:9000/data', 'CSV', 'x UInt32')" $CLICKHOUSE_CLIENT -q "SELECT * FROM hdfs('hdfs://hdfs1/data', 'CSV', 'x UInt32')" 2>&1 | grep -F -q "HDFS_ERROR" && echo 'OK' || echo 'FAIL'; $CLICKHOUSE_CLIENT -q "SELECT * FROM hdfs('http://hdfs1:9000/data', 'CSV', 'x UInt32')" 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo 'OK' || echo 'FAIL'; $CLICKHOUSE_CLIENT -q "SELECT * FROM hdfs('hdfs://hdfs1@nameservice/abcd/data', 'CSV', 'x UInt32')" 2>&1 | grep -F -q "HDFS_ERROR" && echo 'OK' || echo 'FAIL'; - diff --git a/tests/queries/0_stateless/02115_rewrite_local_join_right_distribute_table.sql b/tests/queries/0_stateless/02115_rewrite_local_join_right_distribute_table.sql index 2ab324df787..d5ab82ba064 100644 --- a/tests/queries/0_stateless/02115_rewrite_local_join_right_distribute_table.sql +++ b/tests/queries/0_stateless/02115_rewrite_local_join_right_distribute_table.sql @@ -23,10 +23,6 @@ select t1.* from t1_all t1 join t2_all t2 on t1.a = t2.a ORDER BY t1.a; SELECT '-'; --- make sure data is fully written when reading from distributed -optimize table t1_local final; -optimize table t2_local final; - set distributed_product_mode = 'global'; select * from t1_all t1 where t1.a in (select t2.a from t2_all t2); explain syntax select t1.* from t1_all t1 join t2_all t2 on t1.a = t2.a; diff --git a/tests/queries/0_stateless/02124_insert_deduplication_token_multiple_blocks_replica.sh b/tests/queries/0_stateless/02124_insert_deduplication_token_multiple_blocks_replica.sh index 1c776263f78..0c95abb9867 100755 --- a/tests/queries/0_stateless/02124_insert_deduplication_token_multiple_blocks_replica.sh +++ b/tests/queries/0_stateless/02124_insert_deduplication_token_multiple_blocks_replica.sh @@ -9,6 +9,8 @@ INSERT_BLOCK_SETTINGS="max_insert_block_size=1&min_insert_block_size_rows=0&min_ $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS block_dedup_token_replica SYNC" $CLICKHOUSE_CLIENT --query="CREATE TABLE block_dedup_token_replica (id Int32) ENGINE=ReplicatedMergeTree('/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/{table}', '{replica}') ORDER BY id" +# Need to stop merges due to randomization of old_parts_lifetime setting, so all initial parts are guaranteed to exist when we check them +$CLICKHOUSE_CLIENT --query="SYSTEM STOP MERGES block_dedup_token_replica" $CLICKHOUSE_CLIENT --query="SELECT 'insert 2 blocks with dedup token, 1 row per block'" DEDUP_TOKEN='dedup1' diff --git a/tests/queries/0_stateless/02228_merge_tree_insert_memory_usage.sql b/tests/queries/0_stateless/02228_merge_tree_insert_memory_usage.sql index 8924627a717..26a201ec89f 100644 --- a/tests/queries/0_stateless/02228_merge_tree_insert_memory_usage.sql +++ b/tests/queries/0_stateless/02228_merge_tree_insert_memory_usage.sql @@ -1,16 +1,16 @@ -- Tags: long, no-parallel -SET insert_keeper_fault_injection_probability=0; -- to succeed this test can require too many retries due to 1024 partitions, so disable fault injections +SET insert_keeper_fault_injection_probability=0; -- to succeed this test can require too many retries due to 100 partitions, so disable fault injections -- regression for MEMORY_LIMIT_EXCEEDED error because of deferred final part flush drop table if exists data_02228; -create table data_02228 (key1 UInt32, sign Int8, s UInt64) engine = CollapsingMergeTree(sign) order by (key1) partition by key1 % 1024; -insert into data_02228 select number, 1, number from numbers_mt(100e3) settings max_memory_usage='300Mi', max_partitions_per_insert_block=1024, max_insert_delayed_streams_for_parallel_write=0; -insert into data_02228 select number, 1, number from numbers_mt(100e3) settings max_memory_usage='300Mi', max_partitions_per_insert_block=1024, max_insert_delayed_streams_for_parallel_write=10000000; -- { serverError MEMORY_LIMIT_EXCEEDED } +create table data_02228 (key1 UInt32, sign Int8, s UInt64) engine = CollapsingMergeTree(sign) order by (key1) partition by key1 % 100; +insert into data_02228 select number, 1, number from numbers_mt(10_000) settings max_memory_usage='30Mi', max_partitions_per_insert_block=1024, max_insert_delayed_streams_for_parallel_write=0; +insert into data_02228 select number, 1, number from numbers_mt(10_000) settings max_memory_usage='30Mi', max_partitions_per_insert_block=1024, max_insert_delayed_streams_for_parallel_write=1000000; -- { serverError MEMORY_LIMIT_EXCEEDED } drop table data_02228; drop table if exists data_rep_02228 SYNC; -create table data_rep_02228 (key1 UInt32, sign Int8, s UInt64) engine = ReplicatedCollapsingMergeTree('/clickhouse/{database}', 'r1', sign) order by (key1) partition by key1 % 1024; -insert into data_rep_02228 select number, 1, number from numbers_mt(100e3) settings max_memory_usage='300Mi', max_partitions_per_insert_block=1024, max_insert_delayed_streams_for_parallel_write=0; -insert into data_rep_02228 select number, 1, number from numbers_mt(100e3) settings max_memory_usage='300Mi', max_partitions_per_insert_block=1024, max_insert_delayed_streams_for_parallel_write=10000000; -- { serverError MEMORY_LIMIT_EXCEEDED } +create table data_rep_02228 (key1 UInt32, sign Int8, s UInt64) engine = ReplicatedCollapsingMergeTree('/clickhouse/{database}', 'r1', sign) order by (key1) partition by key1 % 100; +insert into data_rep_02228 select number, 1, number from numbers_mt(10_000) settings max_memory_usage='30Mi', max_partitions_per_insert_block=1024, max_insert_delayed_streams_for_parallel_write=0; +insert into data_rep_02228 select number, 1, number from numbers_mt(10_000) settings max_memory_usage='30Mi', max_partitions_per_insert_block=1024, max_insert_delayed_streams_for_parallel_write=1000000; -- { serverError MEMORY_LIMIT_EXCEEDED } drop table data_rep_02228 SYNC; diff --git a/tests/queries/0_stateless/02271_fix_column_matcher_and_column_transformer.sql b/tests/queries/0_stateless/02271_fix_column_matcher_and_column_transformer.sql index 245b2cc97e3..b2a04788bbb 100644 --- a/tests/queries/0_stateless/02271_fix_column_matcher_and_column_transformer.sql +++ b/tests/queries/0_stateless/02271_fix_column_matcher_and_column_transformer.sql @@ -61,6 +61,11 @@ CREATE TABLE github_events ) ENGINE = MergeTree ORDER BY (event_type, repo_name, created_at); -with top_repos as ( select repo_name from github_events where event_type = 'WatchEvent' and toDate(created_at) = today() - 1 group by repo_name order by count() desc limit 100 union distinct select repo_name from github_events where event_type = 'WatchEvent' and toMonday(created_at) = toMonday(today() - interval 1 week) group by repo_name order by count() desc limit 100 union distinct select repo_name from github_events where event_type = 'WatchEvent' and toStartOfMonth(created_at) = toStartOfMonth(today()) - interval 1 month group by repo_name order by count() desc limit 100 union distinct select repo_name from github_events where event_type = 'WatchEvent' and toYear(created_at) = toYear(today()) - 1 group by repo_name order by count() desc limit 100 ), last_day as ( select repo_name, count() as count_last_day, rowNumberInAllBlocks() + 1 as position_last_day from github_events where repo_name in (select repo_name from top_repos) and toDate(created_at) = today() - 1 group by repo_name order by count_last_day desc ), last_week as ( select repo_name, count() as count_last_week, rowNumberInAllBlocks() + 1 as position_last_week from github_events where repo_name in (select repo_name from top_repos) and toMonday(created_at) = toMonday(today()) - interval 1 week group by repo_name order by count_last_week desc ), last_month as ( select repo_name, count() as count_last_month, rowNumberInAllBlocks() + 1 as position_last_month from github_events where repo_name in (select repo_name from top_repos) and toStartOfMonth(created_at) = toStartOfMonth(today()) - interval 1 month group by repo_name order by count_last_month desc ) select d.repo_name, columns(count) from last_day d join last_week w on d.repo_name = w.repo_name join last_month m on d.repo_name = m.repo_name; +with + top_repos as ( select repo_name from github_events where event_type = 'WatchEvent' and toDate(created_at) = today() - 1 group by repo_name order by count() desc limit 100 union distinct select repo_name from github_events where event_type = 'WatchEvent' and toMonday(created_at) = toMonday(today() - interval 1 week) group by repo_name order by count() desc limit 100 union distinct select repo_name from github_events where event_type = 'WatchEvent' and toStartOfMonth(created_at) = toStartOfMonth(today()) - interval 1 month group by repo_name order by count() desc limit 100 union distinct select repo_name from github_events where event_type = 'WatchEvent' and toYear(created_at) = toYear(today()) - 1 group by repo_name order by count() desc limit 100 ), + last_day as ( select repo_name, count() as count_last_day, rowNumberInAllBlocks() + 1 as position_last_day from github_events where repo_name in (select repo_name from top_repos) and toDate(created_at) = today() - 1 group by repo_name order by count_last_day desc ), + last_week as ( select repo_name, count() as count_last_week, rowNumberInAllBlocks() + 1 as position_last_week from github_events where repo_name in (select repo_name from top_repos) and toMonday(created_at) = toMonday(today()) - interval 1 week group by repo_name order by count_last_week desc ), + last_month as ( select repo_name, count() as count_last_month, rowNumberInAllBlocks() + 1 as position_last_month from github_events where repo_name in (select repo_name from top_repos) and toStartOfMonth(created_at) = toStartOfMonth(today()) - interval 1 month group by repo_name order by count_last_month desc ) +select d.repo_name, columns('count') from last_day d join last_week w on d.repo_name = w.repo_name join last_month m on d.repo_name = m.repo_name; DROP TABLE github_events; diff --git a/tests/queries/0_stateless/02319_lightweight_delete_on_merge_tree.sql b/tests/queries/0_stateless/02319_lightweight_delete_on_merge_tree.sql index 050b8e37722..f82f79dbe44 100644 --- a/tests/queries/0_stateless/02319_lightweight_delete_on_merge_tree.sql +++ b/tests/queries/0_stateless/02319_lightweight_delete_on_merge_tree.sql @@ -102,7 +102,7 @@ ALTER TABLE t_proj ADD PROJECTION p_1 (SELECT avg(a), avg(b), count()) SETTINGS INSERT INTO t_proj SELECT number + 1, number + 1 FROM numbers(1000); -DELETE FROM t_proj WHERE a < 100; -- { serverError BAD_ARGUMENTS } +DELETE FROM t_proj WHERE a < 100; -- { serverError NOT_IMPLEMENTED } SELECT avg(a), avg(b), count() FROM t_proj; diff --git a/tests/queries/0_stateless/02341_analyzer_aliases_basics.reference b/tests/queries/0_stateless/02341_analyzer_aliases_basics.reference index 3733d6b6084..e39cdce92b0 100644 --- a/tests/queries/0_stateless/02341_analyzer_aliases_basics.reference +++ b/tests/queries/0_stateless/02341_analyzer_aliases_basics.reference @@ -17,3 +17,4 @@ Alias conflict with identifier inside expression Alias setting prefer_column_name_to_alias 0 Value +/a/b/c diff --git a/tests/queries/0_stateless/02341_analyzer_aliases_basics.sql b/tests/queries/0_stateless/02341_analyzer_aliases_basics.sql index 52a1cd1dae8..467073fc4e8 100644 --- a/tests/queries/0_stateless/02341_analyzer_aliases_basics.sql +++ b/tests/queries/0_stateless/02341_analyzer_aliases_basics.sql @@ -48,3 +48,5 @@ WITH id AS value SELECT value FROM test_table; SET prefer_column_name_to_alias = 0; DROP TABLE test_table; + +WITH path('clickhouse.com/a/b/c') AS x SELECT x AS path; diff --git a/tests/queries/0_stateless/02343_analyzer_lambdas.sql b/tests/queries/0_stateless/02343_analyzer_lambdas.sql index 0c257cf6f18..25928acb2c3 100644 --- a/tests/queries/0_stateless/02343_analyzer_lambdas.sql +++ b/tests/queries/0_stateless/02343_analyzer_lambdas.sql @@ -93,3 +93,11 @@ SELECT arrayMap(lambda(tuple(x), x + 1), [1, 2, 3]), lambda2(tuple(x), x + 1), 1 DROP TABLE test_table_tuple; DROP TABLE test_table; + +WITH x -> (lambda(x) + 1) AS lambda +SELECT lambda(1); -- {serverError UNSUPPORTED_METHOD } + +WITH + x -> (lambda1(x) + 1) AS lambda, + lambda AS lambda1 +SELECT lambda(1); -- {serverError UNSUPPORTED_METHOD } diff --git a/tests/queries/0_stateless/02344_insert_profile_events_stress.sql b/tests/queries/0_stateless/02344_insert_profile_events_stress.sql index f9fdd3b943f..e9a790bea5d 100644 --- a/tests/queries/0_stateless/02344_insert_profile_events_stress.sql +++ b/tests/queries/0_stateless/02344_insert_profile_events_stress.sql @@ -1,4 +1,4 @@ --- Tags: no-parallel, long, no-debug, no-tsan +-- Tags: no-parallel, long, no-debug, no-tsan, no-msan, no-asan create table data_02344 (key Int) engine=Null; -- 3e9 rows is enough to fill the socket buffer and cause INSERT hung. diff --git a/tests/queries/0_stateless/02374_analyzer_array_join.reference b/tests/queries/0_stateless/02374_analyzer_array_join.reference index 6dd384c7d9c..ad7750228d6 100644 --- a/tests/queries/0_stateless/02374_analyzer_array_join.reference +++ b/tests/queries/0_stateless/02374_analyzer_array_join.reference @@ -45,7 +45,13 @@ SELECT id, value, value_1, value_2 FROM test_table ARRAY JOIN [[1, 2, 3]] AS val 0 Value [1,2,3] 1 0 Value [1,2,3] 2 0 Value [1,2,3] 3 -SELECT 1 AS value FROM test_table ARRAY JOIN [1,2,3] AS value; -- { serverError 179 } +SELECT 1 AS value FROM test_table ARRAY JOIN [1,2,3] AS value; +1 +1 +1 +1 +1 +1 SELECT 'ARRAY JOIN with column'; ARRAY JOIN with column SELECT id, value, test_table.value_array FROM test_table ARRAY JOIN value_array; @@ -84,7 +90,13 @@ SELECT id, value, value_array AS value_array_array_alias FROM test_table ARRAY J 0 Value [4,5,6] SELECT '--'; -- -SELECT id AS value FROM test_table ARRAY JOIN value_array AS value; -- { serverError 179 } +SELECT id AS value FROM test_table ARRAY JOIN value_array AS value; +0 +0 +0 +0 +0 +0 SELECT '--'; -- SELECT id, value, value_array AS value_array_array_alias, value_array_array_alias_element FROM test_table ARRAY JOIN value_array_array_alias AS value_array_array_alias_element; @@ -120,3 +132,7 @@ WHERE NOT ignore(elem) GROUP BY sum(ignore(ignore(ignore(1., 1, 36, 8, 8), ignore(52, 37, 37, '03147_parquet_memory_tracking.parquet', 37, 37, toUInt256(37), 37, 37, toNullable(37), 37, 37), 1., 1, 36, 8, 8), emptyArrayToSingle(arrayMap(x -> toString(x), arrayMap(x -> nullIf(x, 2), arrayJoin([[1]])))))) IGNORE NULLS, modulo(toLowCardinality('03147_parquet_memory_tracking.parquet'), number, toLowCardinality(3)); -- { serverError UNKNOWN_IDENTIFIER } +[1,2] 1 +[1,2] 2 +1 +2 diff --git a/tests/queries/0_stateless/02374_analyzer_array_join.sql b/tests/queries/0_stateless/02374_analyzer_array_join.sql index bc4bb6616c1..8c26df1806e 100644 --- a/tests/queries/0_stateless/02374_analyzer_array_join.sql +++ b/tests/queries/0_stateless/02374_analyzer_array_join.sql @@ -33,7 +33,7 @@ SELECT '--'; SELECT id, value, value_1, value_2 FROM test_table ARRAY JOIN [[1, 2, 3]] AS value_1 ARRAY JOIN value_1 AS value_2; -SELECT 1 AS value FROM test_table ARRAY JOIN [1,2,3] AS value; -- { serverError 179 } +SELECT 1 AS value FROM test_table ARRAY JOIN [1,2,3] AS value; SELECT 'ARRAY JOIN with column'; @@ -53,7 +53,7 @@ SELECT id, value, value_array AS value_array_array_alias FROM test_table ARRAY J SELECT '--'; -SELECT id AS value FROM test_table ARRAY JOIN value_array AS value; -- { serverError 179 } +SELECT id AS value FROM test_table ARRAY JOIN value_array AS value; SELECT '--'; @@ -80,3 +80,6 @@ GROUP BY -- { echoOff } DROP TABLE test_table; + +select [1, 2] as arr, x from system.one array join arr as x; +select x + 1 as x from (select [number] as arr from numbers(2)) as s array join arr as x; diff --git a/tests/queries/0_stateless/02494_query_cache_key.reference b/tests/queries/0_stateless/02494_query_cache_key.reference new file mode 100644 index 00000000000..8f5b61192d5 --- /dev/null +++ b/tests/queries/0_stateless/02494_query_cache_key.reference @@ -0,0 +1,6 @@ +Test (1) +1 +2 +Test (2) +4 +4 diff --git a/tests/queries/0_stateless/02494_query_cache_key.sql b/tests/queries/0_stateless/02494_query_cache_key.sql new file mode 100644 index 00000000000..d8c68e0d267 --- /dev/null +++ b/tests/queries/0_stateless/02494_query_cache_key.sql @@ -0,0 +1,70 @@ +-- Tags: no-parallel +-- Tag no-parallel: Messes with internal cache + +-- Tests that the key of the query cache is not only formed by the query AST but also by +-- (1) the current database (`USE db`, issue #64136), +-- (2) the query settings + + +SELECT 'Test (1)'; + +SYSTEM DROP QUERY CACHE; + +DROP DATABASE IF EXISTS db1; +DROP DATABASE IF EXISTS db2; + +CREATE DATABASE db1; +CREATE DATABASE db2; + +CREATE TABLE db1.tab(a UInt64, PRIMARY KEY a); +CREATE TABLE db2.tab(a UInt64, PRIMARY KEY a); + +INSERT INTO db1.tab values(1); +INSERT INTO db2.tab values(2); + +USE db1; +SELECT * FROM tab SETTINGS use_query_cache=1; + +USE db2; +SELECT * FROM tab SETTINGS use_query_cache=1; + +DROP DATABASE db1; +DROP DATABASE db2; + +SYSTEM DROP QUERY CACHE; + + +SELECT 'Test (2)'; + +-- test with query-level settings +SELECT 1 SETTINGS use_query_cache = 1, limit = 1, use_skip_indexes = 0 Format Null; +SELECT 1 SETTINGS use_query_cache = 1, use_skip_indexes = 0 Format Null; +SELECT 1 SETTINGS use_query_cache = 1, use_skip_indexes = 1 Format Null; +SELECT 1 SETTINGS use_query_cache = 1, max_block_size = 1 Format Null; + +-- 4x the same query but with different settings each. There should yield four entries in the query cache. +SELECT count(query) FROM system.query_cache; + +SYSTEM DROP QUERY CACHE; + +-- test with mixed session-level/query-level settings +SET use_query_cache = 1; +SET limit = 1; +SELECT 1 SETTINGS use_skip_indexes = 0 Format Null; +SET limit = default; +SET use_skip_indexes = 0; +SELECT 1 Format Null; +SET use_skip_indexes = 1; +SELECT 1 SETTINGS use_skip_indexes = 1 Format Null; +SET use_skip_indexes = default; +SET max_block_size = 1; +SELECT 1 Format Null; +SET max_block_size = default; + +SET use_query_cache = default; + +-- 4x the same query but with different settings each. There should yield four entries in the query cache. +SELECT count(query) FROM system.query_cache; + +SYSTEM DROP QUERY CACHE; + diff --git a/tests/queries/0_stateless/02494_query_cache_nested_query_bug.reference b/tests/queries/0_stateless/02494_query_cache_nested_query_bug.reference index 389e2621455..9ec033cefb1 100644 --- a/tests/queries/0_stateless/02494_query_cache_nested_query_bug.reference +++ b/tests/queries/0_stateless/02494_query_cache_nested_query_bug.reference @@ -1,2 +1,4 @@ 2 0 +1 +0 diff --git a/tests/queries/0_stateless/02494_query_cache_nested_query_bug.sh b/tests/queries/0_stateless/02494_query_cache_nested_query_bug.sh index 8712c7c84c6..6bc3d03ac66 100755 --- a/tests/queries/0_stateless/02494_query_cache_nested_query_bug.sh +++ b/tests/queries/0_stateless/02494_query_cache_nested_query_bug.sh @@ -15,11 +15,17 @@ ${CLICKHOUSE_CLIENT} --query "CREATE TABLE tab (a UInt64) ENGINE=MergeTree() ORD ${CLICKHOUSE_CLIENT} --query "INSERT INTO tab VALUES (1) (2) (3)" ${CLICKHOUSE_CLIENT} --query "INSERT INTO tab VALUES (3) (4) (5)" -SETTINGS="SETTINGS use_query_cache=1, max_threads=1, allow_experimental_analyzer=0, merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability=0.0" +SETTINGS_NO_ANALYZER="SETTINGS use_query_cache=1, max_threads=1, allow_experimental_analyzer=0, merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability=0.0" +SETTINGS_ANALYZER="SETTINGS use_query_cache=1, max_threads=1, allow_experimental_analyzer=1, merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability=0.0" # Verify that the first query does two aggregations and the second query zero aggregations. Since query cache is currently not integrated # with EXPLAIN PLAN, we need to check the logs. -${CLICKHOUSE_CLIENT} --send_logs_level=trace --query "SELECT count(a) / (SELECT sum(a) FROM tab) FROM tab $SETTINGS" 2>&1 | grep "Aggregated. " | wc -l -${CLICKHOUSE_CLIENT} --send_logs_level=trace --query "SELECT count(a) / (SELECT sum(a) FROM tab) FROM tab $SETTINGS" 2>&1 | grep "Aggregated. " | wc -l +${CLICKHOUSE_CLIENT} --send_logs_level=trace --query "SELECT count(a) / (SELECT sum(a) FROM tab) FROM tab $SETTINGS_NO_ANALYZER" 2>&1 | grep "Aggregated. " | wc -l +${CLICKHOUSE_CLIENT} --send_logs_level=trace --query "SELECT count(a) / (SELECT sum(a) FROM tab) FROM tab $SETTINGS_NO_ANALYZER" 2>&1 | grep "Aggregated. " | wc -l + +${CLICKHOUSE_CLIENT} --query "SYSTEM DROP QUERY CACHE" + +${CLICKHOUSE_CLIENT} --send_logs_level=trace --query "SELECT count(a) / (SELECT sum(a) FROM tab) FROM tab $SETTINGS_ANALYZER" 2>&1 | grep "Aggregated. " | wc -l +${CLICKHOUSE_CLIENT} --send_logs_level=trace --query "SELECT count(a) / (SELECT sum(a) FROM tab) FROM tab $SETTINGS_ANALYZER" 2>&1 | grep "Aggregated. " | wc -l ${CLICKHOUSE_CLIENT} --query "SYSTEM DROP QUERY CACHE" diff --git a/tests/queries/0_stateless/02521_analyzer_array_join_crash.reference b/tests/queries/0_stateless/02521_analyzer_array_join_crash.reference index 5e7728e0590..426cfe35e73 100644 --- a/tests/queries/0_stateless/02521_analyzer_array_join_crash.reference +++ b/tests/queries/0_stateless/02521_analyzer_array_join_crash.reference @@ -1,11 +1,10 @@ -- { echoOn } -SELECT id, value_element, value FROM test_table ARRAY JOIN [[1,2,3]] AS value_element, value_element AS value; -0 [1,2,3] [1,2,3] +SELECT id, value_element, value FROM test_table ARRAY JOIN [[1,2,3]] AS value_element, value_element AS value; -- { serverError UNKNOWN_IDENTIFIER } SELECT id, value_element, value FROM test_table ARRAY JOIN [[1,2,3]] AS value_element ARRAY JOIN value_element AS value; 0 [1,2,3] 1 0 [1,2,3] 2 0 [1,2,3] 3 -SELECT value_element, value FROM test_table ARRAY JOIN [1048577] AS value_element, arrayMap(x -> value_element, ['']) AS value; -1048577 [1048577] -SELECT arrayFilter(x -> notEmpty(concat(x)), [NULL, NULL]) FROM system.one ARRAY JOIN [1048577] AS elem, arrayMap(x -> splitByChar(x, elem), ['']) AS unused; -- { serverError 44 } +SELECT value_element, value FROM test_table ARRAY JOIN [1048577] AS value_element ARRAY JOIN arrayMap(x -> value_element, ['']) AS value; +1048577 1048577 +SELECT arrayFilter(x -> notEmpty(concat(x)), [NULL, NULL]) FROM system.one ARRAY JOIN [1048577] AS elem ARRAY JOIN arrayMap(x -> splitByChar(x, elem), ['']) AS unused; -- { serverError ILLEGAL_COLUMN } diff --git a/tests/queries/0_stateless/02521_analyzer_array_join_crash.sql b/tests/queries/0_stateless/02521_analyzer_array_join_crash.sql index 53606e01ab7..7842d47d757 100644 --- a/tests/queries/0_stateless/02521_analyzer_array_join_crash.sql +++ b/tests/queries/0_stateless/02521_analyzer_array_join_crash.sql @@ -11,13 +11,13 @@ INSERT INTO test_table VALUES (0, 'Value'); -- { echoOn } -SELECT id, value_element, value FROM test_table ARRAY JOIN [[1,2,3]] AS value_element, value_element AS value; +SELECT id, value_element, value FROM test_table ARRAY JOIN [[1,2,3]] AS value_element, value_element AS value; -- { serverError UNKNOWN_IDENTIFIER } SELECT id, value_element, value FROM test_table ARRAY JOIN [[1,2,3]] AS value_element ARRAY JOIN value_element AS value; -SELECT value_element, value FROM test_table ARRAY JOIN [1048577] AS value_element, arrayMap(x -> value_element, ['']) AS value; +SELECT value_element, value FROM test_table ARRAY JOIN [1048577] AS value_element ARRAY JOIN arrayMap(x -> value_element, ['']) AS value; -SELECT arrayFilter(x -> notEmpty(concat(x)), [NULL, NULL]) FROM system.one ARRAY JOIN [1048577] AS elem, arrayMap(x -> splitByChar(x, elem), ['']) AS unused; -- { serverError 44 } +SELECT arrayFilter(x -> notEmpty(concat(x)), [NULL, NULL]) FROM system.one ARRAY JOIN [1048577] AS elem ARRAY JOIN arrayMap(x -> splitByChar(x, elem), ['']) AS unused; -- { serverError ILLEGAL_COLUMN } -- { echoOff } diff --git a/tests/queries/0_stateless/02534_keyed_siphash.reference b/tests/queries/0_stateless/02534_keyed_siphash.reference index e3fae07333a..3f478218ff1 100644 --- a/tests/queries/0_stateless/02534_keyed_siphash.reference +++ b/tests/queries/0_stateless/02534_keyed_siphash.reference @@ -236,3 +236,6 @@ Check asan bug 0 Check bug found fuzzing 9042C6691B1A75F0EA3314B6F55728BB +Check bug 2 found fuzzing +608E1FF030C9E206185B112C2A25F1A7 +ABB65AE97711A2E053E324ED88B1D08B diff --git a/tests/queries/0_stateless/02534_keyed_siphash.sql b/tests/queries/0_stateless/02534_keyed_siphash.sql index 112ae15bf46..fb707109c83 100644 --- a/tests/queries/0_stateless/02534_keyed_siphash.sql +++ b/tests/queries/0_stateless/02534_keyed_siphash.sql @@ -338,3 +338,10 @@ SELECT sipHash128((toUInt64(9223372036854775806), 1)) = sipHash128(1) GROUP BY s SELECT 'Check bug found fuzzing'; SELECT [(255, 1048575)], sipHash128ReferenceKeyed((toUInt64(2147483646), toUInt64(9223372036854775807)), ([(NULL, 100), (NULL, NULL), (1024, 10)], toUInt64(2), toUInt64(1024)), ''), hex(sipHash128ReferenceKeyed((-9223372036854775807, 1.), '-1', NULL)), ('', toUInt64(65535), [(9223372036854775807, 9223372036854775806)], toUInt64(65536)), arrayJoin((NULL, 65537, 255), [(NULL, NULL)]) GROUP BY tupleElement((NULL, NULL, NULL, -1), toUInt64(2), 2) = NULL; -- { serverError NOT_IMPLEMENTED } SELECT hex(sipHash128ReferenceKeyed((0::UInt64, 0::UInt64), ([1, 1]))); + +SELECT 'Check bug 2 found fuzzing'; +DROP TABLE IF EXISTS sipHashKeyed_keys; +CREATE TABLE sipHashKeyed_keys (`a` Map(String, String)) ENGINE = Memory; +INSERT INTO sipHashKeyed_keys FORMAT VALUES ({'a':'b', 'c':'d'}), ({'e':'f', 'g':'h'}); +SELECT hex(sipHash128ReferenceKeyed((0::UInt64, materialize(0::UInt64)), a)) FROM sipHashKeyed_keys ORDER BY a; +DROP TABLE sipHashKeyed_keys; diff --git a/tests/queries/0_stateless/02700_s3_part_INT_MAX.sh b/tests/queries/0_stateless/02700_s3_part_INT_MAX.sh index d831c7d9806..a34a480a078 100755 --- a/tests/queries/0_stateless/02700_s3_part_INT_MAX.sh +++ b/tests/queries/0_stateless/02700_s3_part_INT_MAX.sh @@ -13,7 +13,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT -nm -q " INSERT INTO FUNCTION s3('http://localhost:11111/test/$CLICKHOUSE_DATABASE/test_INT_MAX.tsv', '', '', 'TSV') SELECT repeat('a', 1024) FROM numbers((pow(2, 30) * 2) / 1024) - SETTINGS s3_max_single_part_upload_size = '10Gi'; + SETTINGS s3_max_single_part_upload_size = '5Gi'; SELECT count() FROM s3('http://localhost:11111/test/$CLICKHOUSE_DATABASE/test_INT_MAX.tsv'); " diff --git a/tests/queries/0_stateless/02725_database_hdfs.sh b/tests/queries/0_stateless/02725_database_hdfs.sh index d62f928e947..1eb22976b84 100755 --- a/tests/queries/0_stateless/02725_database_hdfs.sh +++ b/tests/queries/0_stateless/02725_database_hdfs.sh @@ -58,9 +58,8 @@ SELECT * FROM \"abacaba/file.tsv\" """ 2>&1 | tr '\n' ' ' | grep -oF "CANNOT_EXTRACT_TABLE_STRUCTURE" ${CLICKHOUSE_CLIENT} -q "SELECT * FROM test_hdfs_4.\`http://localhost:11111/test/a.tsv\`" 2>&1 | tr '\n' ' ' | grep -oF -e "UNKNOWN_TABLE" -e "BAD_ARGUMENTS" > /dev/null && echo "OK" || echo 'FAIL' ||: -${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_hdfs_4.\`hdfs://localhost:12222/file.myext\`" 2>&1 | tr '\n' ' ' | grep -oF -e "UNKNOWN_TABLE" -e "CANNOT_EXTRACT_TABLE_STRUCTURE" > /dev/null && echo "OK" || echo 'FAIL' ||: -${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_hdfs_4.\`hdfs://localhost:12222/test_02725_3.tsv\`" 2>&1 | tr '\n' ' ' | grep -oF -e "UNKNOWN_TABLE" -e "CANNOT_EXTRACT_TABLE_STRUCTURE" > /dev/null && echo "OK" || echo 'FAIL' ||: - +${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_hdfs_4.\`hdfs://localhost:12222/file.myext\`" 2>&1 | tr '\n' ' ' | grep -oF -e "UNKNOWN_TABLE" -e "The data format cannot be detected" > /dev/null && echo "OK" || echo 'FAIL' ||: +${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_hdfs_4.\`hdfs://localhost:12222/test_02725_3.tsv\`" 2>&1 | tr '\n' ' ' | grep -oF -e "UNKNOWN_TABLE" -e "The table structure cannot be extracted" > /dev/null && echo "OK" || echo 'FAIL' ||: ${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_hdfs_4.\`hdfs://localhost:12222\`" 2>&1 | tr '\n' ' ' | grep -oF -e "UNKNOWN_TABLE" -e "BAD_ARGUMENTS" > /dev/null && echo "OK" || echo 'FAIL' ||: diff --git a/tests/queries/0_stateless/02735_parquet_encoder.sql b/tests/queries/0_stateless/02735_parquet_encoder.sql index fe45a2a317d..9320d0e57c3 100644 --- a/tests/queries/0_stateless/02735_parquet_encoder.sql +++ b/tests/queries/0_stateless/02735_parquet_encoder.sql @@ -41,7 +41,7 @@ create temporary table basic_types_02735 as select * from generateRandom(' decimal128 Decimal128(20), decimal256 Decimal256(40), ipv4 IPv4, - ipv6 IPv6') limit 10101; + ipv6 IPv6') limit 1011; insert into function file(basic_types_02735.parquet) select * from basic_types_02735; desc file(basic_types_02735.parquet); select (select sum(cityHash64(*)) from basic_types_02735) - (select sum(cityHash64(*)) from file(basic_types_02735.parquet)); @@ -59,7 +59,7 @@ create temporary table nullables_02735 as select * from generateRandom(' fstr Nullable(FixedString(12)), i256 Nullable(Int256), decimal256 Nullable(Decimal256(40)), - ipv6 Nullable(IPv6)') limit 10000; + ipv6 Nullable(IPv6)') limit 1000; insert into function file(nullables_02735.parquet) select * from nullables_02735; select (select sum(cityHash64(*)) from nullables_02735) - (select sum(cityHash64(*)) from file(nullables_02735.parquet)); drop table nullables_02735; @@ -83,7 +83,7 @@ create table arrays_02735 engine = Memory as select * from generateRandom(' decimal64 Array(Decimal64(10)), ipv4 Array(IPv4), msi Map(String, Int16), - tup Tuple(FixedString(3), Array(String), Map(Int8, Date))') limit 10000; + tup Tuple(FixedString(3), Array(String), Map(Int8, Date))') limit 1000; insert into function file(arrays_02735.parquet) select * from arrays_02735; create temporary table arrays_out_02735 as arrays_02735; insert into arrays_out_02735 select * from file(arrays_02735.parquet); @@ -107,7 +107,7 @@ create temporary table madness_02735 as select * from generateRandom(' mln Map(LowCardinality(String), Nullable(Int8)), t Tuple(Map(FixedString(5), Tuple(Array(UInt16), Nullable(UInt16), Array(Tuple(Int8, Decimal64(10))))), Tuple(kitchen UInt64, sink String)), n Nested(hello UInt64, world Tuple(first String, second FixedString(1))) - ') limit 10000; + ') limit 1000; insert into function file(madness_02735.parquet) select * from madness_02735; insert into function file(a.csv) select * from madness_02735 order by tuple(*); insert into function file(b.csv) select aa, aaa, an, aan, l, ln, arrayMap(x->reinterpret(x, 'UInt128'), al) as al_, aaln, mln, t, n.hello, n.world from file(madness_02735.parquet) order by tuple(aa, aaa, an, aan, l, ln, al_, aaln, mln, t, n.hello, n.world); diff --git a/tests/queries/0_stateless/02792_drop_projection_lwd.sql b/tests/queries/0_stateless/02792_drop_projection_lwd.sql index a1d8a9c90f3..dcde7dcc600 100644 --- a/tests/queries/0_stateless/02792_drop_projection_lwd.sql +++ b/tests/queries/0_stateless/02792_drop_projection_lwd.sql @@ -7,7 +7,7 @@ CREATE TABLE t_projections_lwd (a UInt32, b UInt32, PROJECTION p (SELECT * ORDER INSERT INTO t_projections_lwd SELECT number, number FROM numbers(100); -- LWD does not work, as expected -DELETE FROM t_projections_lwd WHERE a = 1; -- { serverError BAD_ARGUMENTS } +DELETE FROM t_projections_lwd WHERE a = 1; -- { serverError NOT_IMPLEMENTED } KILL MUTATION WHERE database = currentDatabase() AND table = 't_projections_lwd' SYNC FORMAT Null; -- drop projection diff --git a/tests/queries/0_stateless/02873_s3_presigned_url_and_url_with_special_characters.sql b/tests/queries/0_stateless/02873_s3_presigned_url_and_url_with_special_characters.sql index da76a5cb88f..1e99eb8b83d 100644 --- a/tests/queries/0_stateless/02873_s3_presigned_url_and_url_with_special_characters.sql +++ b/tests/queries/0_stateless/02873_s3_presigned_url_and_url_with_special_characters.sql @@ -2,5 +2,4 @@ select * from s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/MyPrefix/BU%20-%20UNIT%20-%201/*.parquet'); -- { serverError CANNOT_EXTRACT_TABLE_STRUCTURE } -select * from s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/MyPrefix/*.parquet?some_tocken=ABCD'); -- { serverError CANNOT_EXTRACT_TABLE_STRUCTURE } - +select * from s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/MyPrefix/*.parquet?some_tocken=ABCD'); -- { serverError CANNOT_DETECT_FORMAT } diff --git a/tests/queries/0_stateless/02884_create_view_with_sql_security_option.reference b/tests/queries/0_stateless/02884_create_view_with_sql_security_option.reference index 9ba927fa201..0589fdeef04 100644 --- a/tests/queries/0_stateless/02884_create_view_with_sql_security_option.reference +++ b/tests/queries/0_stateless/02884_create_view_with_sql_security_option.reference @@ -24,6 +24,9 @@ OK 2 OK OK +OK +100 +100 ===== TestGrants ===== OK OK diff --git a/tests/queries/0_stateless/02884_create_view_with_sql_security_option.sh b/tests/queries/0_stateless/02884_create_view_with_sql_security_option.sh index 9c9df120298..f32aee44bee 100755 --- a/tests/queries/0_stateless/02884_create_view_with_sql_security_option.sh +++ b/tests/queries/0_stateless/02884_create_view_with_sql_security_option.sh @@ -159,6 +159,45 @@ ${CLICKHOUSE_CLIENT} --query "REVOKE SELECT ON $db.test_table FROM $user1" (( $(${CLICKHOUSE_CLIENT} --user $user2 --query "SELECT * FROM $db.test_mv_4" 2>&1 | grep -c "Not enough privileges") >= 1 )) && echo "OK" || echo "UNEXPECTED" (( $(${CLICKHOUSE_CLIENT} --query "INSERT INTO $db.test_table VALUES ('foo'), ('bar');" 2>&1 | grep -c "Not enough privileges") >= 1 )) && echo "OK" || echo "UNEXPECTED" +${CLICKHOUSE_CLIENT} --multiquery <&1 | grep -c "Not enough privileges") >= 1 )) && echo "OK" || echo "UNEXPECTED" +${CLICKHOUSE_CLIENT} --query "GRANT INSERT ON $db.source TO $user2" +${CLICKHOUSE_CLIENT} --user $user2 --query "INSERT INTO source SELECT * FROM generateRandom() LIMIT 100" + +${CLICKHOUSE_CLIENT} --query "SELECT count() FROM destination1" +${CLICKHOUSE_CLIENT} --query "SELECT count() FROM destination2" echo "===== TestGrants =====" ${CLICKHOUSE_CLIENT} --query "GRANT CREATE ON *.* TO $user1" @@ -192,7 +231,6 @@ ${CLICKHOUSE_CLIENT} --user $user1 --query " ${CLICKHOUSE_CLIENT} --query "GRANT SET DEFINER ON $user2 TO $user1" - echo "===== TestRowPolicy =====" ${CLICKHOUSE_CLIENT} --multiquery < + +Akiba_Hebrew_Academy 2017-08-01 241 +Aegithina_tiphia 2018-02-01 34 +1971-72_Utah_Stars_season 2016-10-01 1 + +<-- Read DOS endings with setting input_format_tsv_crlf_end_of_line=1 --> + +Akiba_Hebrew_Academy 2017-08-01 241 +Aegithina_tiphia 2018-02-01 34 +1971-72_Utah_Stars_season 2016-10-01 1 diff --git a/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh b/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh new file mode 100755 index 00000000000..14f28f1ba4a --- /dev/null +++ b/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# Data preparation step +USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') +UNIX_ENDINGS="${CLICKHOUSE_TEST_UNIQUE_NAME}_data_without_crlf.tsv" +DOS_ENDINGS="${CLICKHOUSE_TEST_UNIQUE_NAME}_data_with_crlf.tsv" +DATA_FILE_UNIX_ENDINGS="${USER_FILES_PATH:?}/${UNIX_ENDINGS}" +DATA_FILE_DOS_ENDINGS="${USER_FILES_PATH:?}/${DOS_ENDINGS}" + +touch $DATA_FILE_UNIX_ENDINGS +touch $DATA_FILE_DOS_ENDINGS + +echo -ne "Akiba_Hebrew_Academy\t2017-08-01\t241\nAegithina_tiphia\t2018-02-01\t34\n1971-72_Utah_Stars_season\t2016-10-01\t1\n" > $DATA_FILE_UNIX_ENDINGS +echo -ne "Akiba_Hebrew_Academy\t2017-08-01\t241\r\nAegithina_tiphia\t2018-02-01\t34\r\n1971-72_Utah_Stars_season\t2016-10-01\t1\r\n" > $DATA_FILE_DOS_ENDINGS + +echo -e "<-- Read UNIX endings -->\n" +$CLICKHOUSE_CLIENT --query "SELECT * FROM file(${UNIX_ENDINGS}, 'TabSeparated', 'SearchTerm String, Date Date, Hits UInt32');" +$CLICKHOUSE_CLIENT --multiquery --query "SELECT * FROM file(${DOS_ENDINGS}, 'TabSeparated', 'SearchTerm String, Date Date, Hits UInt32'); --{serverError 117}" + +echo -e "\n<-- Read DOS endings with setting input_format_tsv_crlf_end_of_line=1 -->\n" +$CLICKHOUSE_CLIENT --query "SELECT * FROM file(${DOS_ENDINGS}, 'TabSeparated', 'SearchTerm String, Date Date, Hits UInt32') SETTINGS input_format_tsv_crlf_end_of_line = 1;" + +# Test teardown +rm $DATA_FILE_UNIX_ENDINGS +rm $DATA_FILE_DOS_ENDINGS diff --git a/tests/queries/0_stateless/02998_native_parquet_reader.reference b/tests/queries/0_stateless/02998_native_parquet_reader.reference new file mode 100644 index 00000000000..38dd9f02b8b --- /dev/null +++ b/tests/queries/0_stateless/02998_native_parquet_reader.reference @@ -0,0 +1,2000 @@ +103002316 1646595280 hsn dxj wrm 1987-05-04 19:24:06.618814000 1938-12-29 14:03:44.995783000 -10371879330867684414581450918534810.916 +1548158706 -1216519640 rdx xsn sey 1942-04-24 07:22:01.629877000 1963-05-31 09:22:00.597388000 9453586064049253908450649688266944.965 +-1409329494 215463808 ytf idx grm 1964-04-02 15:32:10.118860000 1987-11-29 12:21:52.464881000 20816451926961221076776482452655249.999 +-1328718984 215070586 toj ykf sny 1962-12-25 19:25:23.099217000 1987-09-25 08:35:27.030011000 3827321890837346905750385640207228.295 +116634012 1646857428 eyt toa fau 1991-01-19 01:53:43.664105000 1939-01-24 15:34:19.169731000 -18887700638539487859059986313378243.468 +1454047574 -1216257492 ugb eyk sni 1976-06-16 10:05:27.546241000 1963-07-22 12:23:08.945284000 13732428483989527846770435107306259.709 +1534658084 -1216650714 pkv lgb oau 1938-08-21 01:37:41.671559000 1963-05-18 08:36:43.510414000 18010946380424307642205251259343751.221 +-1315087288 215332734 aug pbv bvh 1966-09-11 01:55:00.144508000 1987-10-21 10:06:01.203959000 16579148515724197427732763209849308.959 +22522880 214677364 hcw alg oju 1952-02-07 10:06:44.026644000 1939-03-17 18:35:27.517627000 -14608858218599213920740200894338928.724 +103133390 1646726354 lwr hcn bvq 1987-05-17 20:09:23.705788000 1939-01-11 14:49:02.082757000 -10330340322164434125305384742301437.212 +1548289780 -1216388566 vhc cwr gbv 1942-05-07 08:07:18.716851000 1963-06-13 10:07:17.684362000 9495125072752504197726715864500318.669 +-1409198420 -1217043936 dxj mhc kvq \N 1987-12-12 13:07:09.551855000 20857990935664471366052548628888623.703 +-1328587910 215201660 xsn doj wrd 1964-04-15 16:17:27.205834000 1987-10-08 09:20:44.116985000 3868860899540597195026451816440601.999 +116765086 1646988502 idx xse cwi 1963-01-07 20:10:40.186191000 1939-02-06 16:19:36.256705000 -18846161629836237569783920137144869.764 +1454178648 1646333132 ykf ido wrm 1991-02-01 02:39:00.751079000 1938-12-29 14:03:44.995783000 13773967492692778136046501283539633.413 +1534789158 -1216519640 toa pkf sey 1976-06-29 10:50:44.633215000 1963-05-31 09:22:00.597388000 18052485389127557931481317435577124.925 +-1314956214 215463808 eyk tfa xje 1938-09-03 02:22:58.758533000 1987-11-03 10:51:18.290933000 16620687524427447717008829386082682.663 +22653954 214808438 lgb epk sny 1966-09-24 02:40:17.231482000 1987-09-25 08:35:27.030011000 -14567319209895963631464134718105555.02 +103264464 1646857428 pbv lgr fau 1952-02-20 10:52:01.113618000 1939-01-24 15:34:19.169731000 -10288801313461183836029318566068063.508 +1548420854 -1216257492 alg gbv kfa 1987-05-30 20:54:40.792762000 1963-06-26 10:52:34.771336000 9536664081455754487002782040733692.373 +-1409067346 -1216912862 hcn qlg oau 1942-05-20 08:52:35.803825000 1963-05-18 08:36:43.510414000 20899529944367721655328614805121997.407 +-1328456836 215332734 cwr hsn bvh 1964-04-28 17:02:44.292808000 1987-10-21 10:06:01.203959000 3910399908243847484302517992673975.703 +116896160 214677364 mhc cwi gbm 1963-01-20 20:55:57.273165000 1939-02-19 17:04:53.343679000 -18804622621132987280507853960911496.06 +1454309722 1646464206 doj mhs bvq 1991-02-14 03:24:17.838053000 1939-01-11 14:49:02.082757000 13815506501396028425322567459773007.117 +1534920232 -1216388566 xse toj gbv 1976-07-12 11:36:01.720189000 1963-06-13 10:07:17.684362000 18094024397830808220757383611810498.629 +-1314825140 -1217043936 ido xje cni 1938-09-16 03:08:15.845507000 1987-11-16 11:36:35.377907000 16662226533130698006284895562316056.367 +22785028 214939512 pkf ito wrd 1966-10-07 03:25:34.318456000 1987-10-08 09:20:44.116985000 -14525780201192713342188068541872181.316 +103395538 1646988502 tfa pkv cwi 1952-03-04 11:37:18.200592000 1939-02-06 16:19:36.256705000 -10247262304757933546753252389834689.804 +1548551928 1646333132 epk kfa oje 1987-06-12 21:39:57.879736000 1963-07-09 11:37:51.858310000 9578203090159004776278848216967066.077 +-1408936272 -1216781788 lgr upk sey 1942-06-02 09:37:52.890799000 1963-05-31 09:22:00.597388000 20941068953070971944604680981355371.111 +-1328325762 215463808 gbv lwr xje 1964-05-11 17:48:01.379782000 1987-11-03 10:51:18.290933000 3951938916947097773578584168907349.407 +117027234 214808438 qlg gbm kfq 1963-02-02 21:41:14.360139000 1939-03-04 17:50:10.430653000 -18763083612429736991231787784678122.356 +1454440796 1646595280 hsn qlw fau 1991-02-27 04:09:34.925027000 1939-01-24 15:34:19.169731000 13857045510099278714598633636006380.821 +1535051306 -1216257492 cwi xsn kfa 1976-07-25 12:21:18.807163000 1963-06-26 10:52:34.771336000 18135563406534058510033449788043872.333 +-1314694066 -1216912862 mhs cni grm 1938-09-29 03:53:32.932481000 1987-11-29 12:21:52.464881000 16703765541833948295560961738549430.071 +22916102 215070586 toj mxs bvh 1966-10-20 04:10:51.405430000 1987-10-21 10:06:01.203959000 -14484241192489463052912002365638807.612 +103526612 214677364 xje toa gbm 1952-03-17 12:22:35.287566000 1939-02-19 17:04:53.343679000 -10205723296054683257477186213601316.1 +1548683002 1646464206 ito oje sni 1987-06-25 22:25:14.966710000 1963-07-22 12:23:08.945284000 9619742098862255065554914393200439.781 +-1408805198 -1216650714 pkv yto gbv 1942-06-15 10:23:09.977773000 1963-06-13 10:07:17.684362000 20982607961774222233880747157588744.815 +-1328194688 -1217043936 kfa pbv cni 1964-05-24 18:33:18.466755000 1987-11-16 11:36:35.377907000 3993477925650348062854650345140723.111 +117158308 214939512 upk kfq oju 1963-02-15 22:26:31.447112000 1939-03-17 18:35:27.517627000 -18721544603726486701955721608444748.652 +1454571870 1646726354 lwr upb cwi 1991-03-12 04:54:52.012001000 1939-02-06 16:19:36.256705000 13898584518802529003874699812239754.525 +1535182380 1646333132 gbm cwr oje 1976-08-07 13:06:35.894137000 1963-07-09 11:37:51.858310000 18177102415237308799309515964277246.037 +-1314562992 -1216781788 qlw grm kvq 1938-10-12 04:38:50.019455000 1987-12-12 13:07:09.551855000 16745304550537198584837027914782803.775 +23047176 215201660 xsn qcw xje 1966-11-02 04:56:08.492404000 1987-11-03 10:51:18.290933000 -14442702183786212763635936189405433.908 +103657686 214808438 cni xse kfq 1952-03-30 13:07:52.374540000 1939-03-04 17:50:10.430653000 -10164184287351432968201120037367942.396 +1548814076 1646595280 mxs sni wrm 1987-07-08 23:10:32.053684000 1938-12-29 14:03:44.995783000 9661281107565505354830980569433813.485 +-1408674124 -1216519640 toa dxs kfa 1942-06-28 11:08:27.064746000 1963-06-26 10:52:34.771336000 21024146970477472523156813333822118.519 +-1328063614 -1216912862 oje tfa grm 1964-06-06 19:18:35.553729000 1987-11-29 12:21:52.464881000 4035016934353598352130716521374096.815 +117289382 215070586 yto oju sny 1963-02-28 23:11:48.534086000 1987-09-25 08:35:27.030011000 -18680005595023236412679655432211374.948 +1454702944 1646857428 pbv ytf gbm 1991-03-25 05:40:09.098975000 1939-02-19 17:04:53.343679000 13940123527505779293150765988473128.229 +1535313454 1646464206 kfq gbv sni 1976-08-20 13:51:52.981111000 1963-07-22 12:23:08.945284000 18218641423940559088585582140510619.741 +-1314431918 -1216650714 upb \N oau 1938-10-25 05:24:07.106429000 1963-05-18 08:36:43.510414000 16786843559240448874113094091016177.479 +23178250 215332734 cwr kvq cni 1966-11-15 05:41:25.579378000 1987-11-16 11:36:35.377907000 -14401163175082962474359870013172060.204 +103788760 214939512 grm ugb oju 1952-04-12 13:53:09.461514000 1939-03-17 18:35:27.517627000 -10122645278648182678925053861134568.692 +1548945150 1646726354 qcw cwi bvq 1987-07-21 23:55:49.140658000 1939-01-11 14:49:02.082757000 9702820116268755644107046745667187.189 +-1408543050 -1216388566 xse wrm oje 1942-07-11 11:53:44.151720000 1963-07-09 11:37:51.858310000 21065685979180722812432879510055492.223 +-1327932540 -1216781788 sni hcw kvq 1964-06-19 20:03:52.640703000 1987-12-12 13:07:09.551855000 4076555943056848641406782697607470.519 +117420456 215201660 dxs xje wrd 1963-03-13 23:57:05.621060000 1987-10-08 09:20:44.116985000 -18638466586319986123403589255978001.244 +1454834018 1646988502 tfa sny kfq 1991-04-07 06:25:26.185949000 1939-03-04 17:50:10.430653000 13981662536209029582426832164706501.933 +1535444528 1646595280 oju dxj wrm 1976-09-02 14:37:10.068085000 1938-12-29 14:03:44.995783000 18260180432643809377861648316743993.445 +-1314300844 -1216519640 ytf kfa sey 1938-11-07 06:09:24.193403000 1963-05-31 09:22:00.597388000 16828382567943699163389160267249551.183 +23309324 215463808 gbv oau grm 1966-11-28 06:26:42.666352000 1987-11-29 12:21:52.464881000 -14359624166379712185083803836938686.5 +103919834 215070586 kvq ykf sny 1952-04-25 14:38:26.548488000 1987-09-25 08:35:27.030011000 -10081106269944932389648987684901194.988 +1549076224 1646857428 ugb gbm fau 1987-08-04 00:41:06.227632000 1939-01-24 15:34:19.169731000 9744359124972005933383112921900560.893 +-1408411976 -1216257492 cwi bvq sni 1942-07-24 12:39:01.238694000 1963-07-22 12:23:08.945284000 21107224987883973101708945686288865.927 +-1327801466 -1216650714 wrm lgb oau 1964-07-02 20:49:09.727677000 1963-05-18 08:36:43.510414000 4118094951760098930682848873840844.223 +117551530 215332734 hcw cni bvh 1963-03-27 00:42:22.708034000 1987-10-21 10:06:01.203959000 -18596927577616735834127523079744627.54 +1454965092 214677364 xje wrd oju 1991-04-20 07:10:43.272923000 1939-03-17 18:35:27.517627000 14023201544912279871702898340939875.637 +1535575602 1646726354 sny hcn bvq 1976-09-15 15:22:27.155059000 1939-01-11 14:49:02.082757000 18301719441347059667137714492977367.149 +-1314169770 -1216388566 dxj oje gbv 1938-11-20 06:54:41.280377000 1963-06-13 10:07:17.684362000 16869921576646949452665226443482924.887 +23440398 -1217043936 kfa sey kvq 1966-12-11 07:11:59.753326000 1987-12-12 13:07:09.551855000 -14318085157676461895807737660705312.796 +104050908 215201660 oau doj wrd 1952-05-08 15:23:43.635462000 1987-10-08 09:20:44.116985000 -10039567261241682100372921508667821.284 +1549207298 1646988502 ykf kfq cwi 1987-08-17 01:26:23.314606000 1939-02-06 16:19:36.256705000 9785898133675256222659179098133934.597 +-1408280902 1646333132 gbm fau wrm 1942-08-06 13:24:18.325668000 1938-12-29 14:03:44.995783000 21148763996587223390985011862522239.631 +-1327670392 -1216519640 bvq pkf sey 1964-07-15 21:34:26.814651000 1963-05-31 09:22:00.597388000 4159633960463349219958915050074217.927 +117682604 215463808 lgb grm xje 1963-04-09 01:27:39.795008000 1987-11-03 10:51:18.290933000 -18555388568913485544851456903511253.836 +1455096166 214808438 cni bvh sny 1991-05-03 07:56:00.359897000 1987-09-25 08:35:27.030011000 14064740553615530160978964517173249.341 +1535706676 1646857428 wrd lgr fau 1976-09-28 16:07:44.242033000 1939-01-24 15:34:19.169731000 18343258450050309956413780669210740.853 +-1314038696 -1216257492 hcn sni kfa 1938-12-03 07:39:58.367351000 1963-06-26 10:52:34.771336000 16911460585350199741941292619716298.591 +23571472 -1216912862 oje wid oau 1966-12-24 07:57:16.840300000 1963-05-18 08:36:43.510414000 -14276546148973211606531671484471939.092 +104181982 215332734 sey hsn bvh 1952-05-21 16:09:00.722436000 1987-10-21 10:06:01.203959000 -9998028252538431811096855332434447.58 +1549338372 214677364 doj oju gbm 1987-08-30 02:11:40.401579000 1939-02-19 17:04:53.343679000 9827437142378506511935245274367308.301 +-1408149828 1646464206 kfq jey bvq 1942-08-19 14:09:35.412642000 1939-01-11 14:49:02.082757000 21190303005290473680261078038755613.335 +-1327539318 -1216388566 fau toj gbv 1964-07-28 22:19:43.901625000 1963-06-13 10:07:17.684362000 4201172969166599509234981226307591.631 +117813678 -1217043936 pkf kvq cni 1963-04-22 02:12:56.881982000 1987-11-16 11:36:35.377907000 -18513849560210235255575390727277880.132 +1455227240 214939512 grm fal wrd 1991-05-16 08:41:17.446871000 1987-10-08 09:20:44.116985000 14106279562318780450255030693406623.045 +1535837750 1646988502 bvh pkv cwi 1976-10-11 16:53:01.329007000 1939-02-06 16:19:36.256705000 18384797458753560245689846845444114.557 +-1313907622 1646333132 lgr wrm oje 1938-12-16 08:25:15.454325000 1963-07-09 11:37:51.858310000 16952999594053450031217358795949672.295 +23702546 -1216781788 sni \N sey 1967-01-06 08:42:33.927274000 1963-05-31 09:22:00.597388000 -14235007140269961317255605308238565.388 +104313056 215463808 wid bmh xje 1952-06-03 16:54:17.809410000 1987-11-03 10:51:18.290933000 -9956489243835181521820789156201073.876 +1549469446 214808438 hsn lwr kfq 1987-09-12 02:56:57.488553000 1939-03-04 17:50:10.430653000 9868976151081756801211311450600682.005 +-1408018754 1646595280 oju sny fau 1942-09-01 14:54:52.499616000 1939-01-24 15:34:19.169731000 21231842013993723969537144214988987.039 +-1327408244 -1216257492 jey nid kfa 1964-08-10 23:05:00.988599000 1963-06-26 10:52:34.771336000 4242711977869849798511047402540965.335 +117944752 -1216912862 toj xsn grm 1963-05-05 02:58:13.968956000 1987-11-29 12:21:52.464881000 -18472310551506984966299324551044506.428 +1455358314 215070586 kvq oau bvh 1991-05-29 09:26:34.533845000 1987-10-21 10:06:01.203959000 14147818571022030739531096869639996.749 +1535968824 214677364 fal jep gbm 1976-10-24 17:38:18.415981000 1939-02-19 17:04:53.343679000 18426336467456810534965913021677488.261 +-1313776548 1646464206 pkv toa sni 1938-12-29 09:10:32.541299000 1963-07-22 12:23:08.945284000 16994538602756700320493424972183045.999 +23833620 -1216650714 wrm bvq gbv 1967-01-19 09:27:51.014248000 1963-06-13 10:07:17.684362000 -14193468131566711027979539132005191.684 +104444130 -1217043936 bmh fql cni 1952-06-16 17:39:34.896384000 1987-11-16 11:36:35.377907000 -9914950235131931232544722979967700.172 +1549600520 214939512 lwr pbv oju 1987-09-25 03:42:14.575527000 1939-03-17 18:35:27.517627000 \N +-1407887680 1646726354 sny wrd cwi 1942-09-14 15:40:09.586590000 1939-02-06 16:19:36.256705000 9910515159785007090487377626834055.709 +-1327277170 1646333132 nid rmh oje 1964-08-23 23:50:18.075573000 1963-07-09 11:37:51.858310000 5733090138320292352297426736847.527 +118075826 -1216781788 xsn cwr kvq 1963-05-18 03:43:31.055930000 1987-12-12 13:07:09.551855000 4284250986573100087787113578774339.039 +1455489388 215201660 oau sey xje 1991-06-11 10:11:51.620819000 1987-11-03 10:51:18.290933000 -18430771542803734677023258374811132.724 +1536099898 214808438 jep nit kfq 1976-11-06 18:23:35.502955000 1939-03-04 17:50:10.430653000 14189357579725281028807163045873370.453 +-1313645474 1646595280 toa xse wrm 1939-01-11 09:55:49.628273000 1938-12-29 14:03:44.995783000 18467875476160060824241979197910861.965 +23964694 -1216519640 bvq fau kfa 1967-02-01 10:13:08.101222000 1963-06-26 10:52:34.771336000 17036077611459950609769491148416419.703 +104575204 -1216912862 fql jup grm 1952-06-29 18:24:51.983358000 1987-11-29 12:21:52.464881000 -14151929122863460738703472955771817.98 +1549731594 215070586 pbv tfa sny 1987-10-08 04:27:31.662501000 1987-09-25 08:35:27.030011000 -9873411226428680943268656803734326.468 +-1407756606 1646857428 wrd bvh gbm 1942-09-27 16:25:26.673564000 1939-02-19 17:04:53.343679000 9952054168488257379763443803067429.413 +-1327146096 1646464206 rmh vql sni 1964-09-06 00:35:35.162547000 1963-07-22 12:23:08.945284000 47272098841570581628363602970221.231 +118206900 -1216650714 cwr gbv oau \N 1963-05-18 08:36:43.510414000 4325789995276350377063179755007712.743 +1455620462 215332734 sey wid cni 1963-05-31 04:28:48.142904000 1987-11-16 11:36:35.377907000 -18389232534100484387747192198577759.02 +1536230972 214939512 nit rmx oju 1991-06-24 10:57:08.707793000 1939-03-17 18:35:27.517627000 14230896588428531318083229222106744.157 +-1313514400 1646726354 xse cwi bvq 1976-11-19 19:08:52.589929000 1939-01-11 14:49:02.082757000 18509414484863311113518045374144235.669 +24095768 -1216388566 fau jey oje 1939-01-24 10:41:06.715247000 1963-07-09 11:37:51.858310000 17077616620163200899045557324649793.407 +104706278 -1216781788 jup nyt kvq 1967-02-14 10:58:25.188196000 1987-12-12 13:07:09.551855000 -14110390114160210449427406779538444.276 +1549862668 215201660 tfa xje wrd 1952-07-12 19:10:09.070331000 1987-10-08 09:20:44.116985000 -9831872217725430653992590627500952.764 +-1407625532 1646988502 bvh fal kfq 1987-10-21 05:12:48.749475000 1939-03-04 17:50:10.430653000 9993593177191507669039509979300803.117 +-1327015022 1646595280 vql aup wrm 1942-10-10 17:10:43.760538000 1938-12-29 14:03:44.995783000 88811107544820870904429779203594.935 +118337974 -1216519640 gbv kfa sey 1964-09-19 01:20:52.249521000 1963-05-31 09:22:00.597388000 4367329003979600666339245931241086.447 +1455751536 215463808 wid bmh grm 1963-06-13 05:14:05.229878000 1987-11-29 12:21:52.464881000 -18347693525397234098471126022344385.316 +1536362046 215070586 rmx vqc sny 1991-07-07 11:42:25.794767000 1987-09-25 08:35:27.030011000 14272435597131781607359295398340117.861 +-1313383326 1646857428 cwi gbm fau 1976-12-02 19:54:09.676903000 1939-01-24 15:34:19.169731000 18550953493566561402794111550377609.373 +24226842 -1216257492 jey nid sni 1939-02-06 11:26:23.802221000 1963-07-22 12:23:08.945284000 17119155628866451188321623500883167.111 +104837352 -1216650714 nyt rdx oau 1967-02-27 11:43:42.275169000 1963-05-18 08:36:43.510414000 -14068851105456960160151340603305070.572 +1549993742 215332734 xje cni bvh 1952-07-25 19:55:26.157305000 1987-10-21 10:06:01.203959000 -9790333209022180364716524451267579.06 +-1407494458 214677364 fal jep oju 1987-11-03 05:58:05.836449000 1939-03-17 18:35:27.517627000 10035132185894757958315576155534176.821 +-1326883948 1646726354 aup eyt bvq 1942-10-23 17:56:00.847512000 1939-01-11 14:49:02.082757000 130350116248071160180495955436968.639 +118469048 -1216388566 kfa oje gbv 1964-10-02 02:06:09.336495000 1963-06-13 10:07:17.684362000 4408868012682850955615312107474460.151 +1455882610 -1217043936 bmh fql kvq 1963-06-26 05:59:22.316852000 1987-12-12 13:07:09.551855000 -18306154516693983809195059846111011.612 +1536493120 215201660 vqc aug wrd 1991-07-20 12:27:42.881741000 1987-10-08 09:20:44.116985000 14313974605835031896635361574573491.565 +-1313252252 1646988502 gbm kfq cwi 1976-12-15 20:39:26.763877000 1939-02-06 16:19:36.256705000 18592492502269811692070177726610983.077 +24357916 1646333132 nid rmh wrm 1939-02-19 12:11:40.889195000 1938-12-29 14:03:44.995783000 17160694637569701477597689677116540.815 +104968426 -1216519640 rdx vhc sey 1967-03-12 12:28:59.362143000 1963-05-31 09:22:00.597388000 -14027312096753709870875274427071696.868 +1550124816 215463808 cni grm xje 1952-08-07 20:40:43.244279000 1987-11-03 10:51:18.290933000 -9748794200318930075440458275034205.356 +-1407363384 214808438 jep nit sny 1987-11-16 06:43:22.923423000 1987-09-25 08:35:27.030011000 10076671194598008247591642331767550.525 +-1326752874 1646857428 eyt idx fau 1942-11-05 18:41:17.934486000 1939-01-24 15:34:19.169731000 171889124951321449456562131670342.343 +118600122 -1216257492 oje sni kfa 1964-10-15 02:51:26.423469000 1963-06-26 10:52:34.771336000 4450407021386101244891378283707833.855 +1456013684 -1216912862 fql jup oau 1963-07-09 06:44:39.403826000 1963-05-18 08:36:43.510414000 -18264615507990733519918993669877637.908 +1536624194 215332734 aug eyk bvh 1991-08-02 13:12:59.968715000 1987-10-21 10:06:01.203959000 14355513614538282185911427750806865.269 +-1313121178 214677364 kfq oju gbm 1976-12-28 21:24:43.850851000 1939-02-19 17:04:53.343679000 18634031510973061981346243902844356.781 +24488990 1646464206 rmh vql bvq 1939-03-04 12:56:57.976169000 1939-01-11 14:49:02.082757000 17202233646272951766873755853349914.519 +105099500 -1216388566 vhc alg gbv 1967-03-25 13:14:16.449117000 1963-06-13 10:07:17.684362000 -13985773088050459581599208250838323.164 +1550255890 -1217043936 grm kvq cni 1952-08-20 21:26:00.331253000 1987-11-16 11:36:35.377907000 -9707255191615679786164392098800831.652 +-1407232310 214939512 nit rmx wrd 1987-11-29 07:28:40.010397000 1987-10-08 09:20:44.116985000 10118210203301258536867708508000924.229 +-1326621800 1646988502 idx mhc cwi 1942-11-18 19:26:35.021460000 1939-02-06 16:19:36.256705000 213428133654571738732628307903716.047 +118731196 1646333132 sni wrm oje 1964-10-28 03:36:43.510443000 1963-07-09 11:37:51.858310000 4491946030089351534167444459941207.559 +1456144758 -1216781788 jup nyt sey 1963-07-22 07:29:56.490800000 1963-05-31 09:22:00.597388000 -18223076499287483230642927493644264.204 +1536755268 215463808 eyk ido xje 1991-08-15 13:58:17.055689000 1987-11-03 10:51:18.290933000 14397052623241532475187493927040238.973 +-1312990104 214808438 oju sny kfq 1977-01-10 22:10:00.937825000 1939-03-04 17:50:10.430653000 18675570519676312270622310079077730.485 +24620064 1646595280 vql aup fau 1939-03-17 13:42:15.063143000 1939-01-24 15:34:19.169731000 17243772654976202056149822029583288.223 +105230574 -1216257492 alg epk kfa 1967-04-07 13:59:33.536091000 1963-06-26 10:52:34.771336000 -13944234079347209292323142074604949.46 +1550386964 -1216912862 kvq oau grm 1952-09-02 22:11:17.418227000 1987-11-29 12:21:52.464881000 -9665716182912429496888325922567457.948 +-1407101236 215070586 rmx vqc bvh 1987-12-12 08:13:57.097371000 1987-10-21 10:06:01.203959000 10159749212004508826143774684234297.933 +-1326490726 214677364 mhc qlg gbm 1942-12-01 20:11:52.108434000 1939-02-19 17:04:53.343679000 254967142357822028008694484137089.751 +118862270 1646464206 wrm bvq sni 1964-11-10 04:22:00.597417000 1963-07-22 12:23:08.945284000 4533485038792601823443510636174581.263 +1456275832 -1216650714 nyt rdx gbv 1963-08-04 08:15:13.577774000 1963-06-13 10:07:17.684362000 -18181537490584232941366861317410890.5 +1536886342 -1217043936 ido mhs cni 1991-08-28 14:43:34.142663000 1987-11-16 11:36:35.377907000 14438591631944782764463560103273612.677 +-1312859030 214939512 sny wrd oju 1977-01-23 22:55:18.024798000 1939-03-17 18:35:27.517627000 18717109528379562559898376255311104.189 +24751138 1646726354 aup eyt cwi 1939-03-30 14:27:32.150116000 1939-02-06 16:19:36.256705000 17285311663679452345425888205816661.927 +105361648 1646333132 epk ito oje 1967-04-20 14:44:50.623065000 1963-07-09 11:37:51.858310000 -13902695070643959003047075898371575.756 +1550518038 -1216781788 oau sey kvq 1952-09-15 22:56:34.505201000 1987-12-12 13:07:09.551855000 -9624177174209179207612259746334084.244 +-1406970162 215201660 vqc aug xje 1987-12-25 08:59:14.184345000 1987-11-03 10:51:18.290933000 10201288220707759115419840860467671.637 +-1326359652 214808438 qlg upk kfq 1942-12-14 20:57:09.195408000 1939-03-04 17:50:10.430653000 296506151061072317284760660370463.455 +118993344 1646595280 bvq fau wrm 1964-11-23 05:07:17.684391000 1938-12-29 14:03:44.995783000 4575024047495852112719576812407954.967 +1456406906 -1216519640 rdx vhc kfa 1963-08-17 09:00:30.664748000 1963-06-26 10:52:34.771336000 -18139998481880982652090795141177516.796 +1537017416 -1216912862 mhs qlw grm 1991-09-10 15:28:51.229636000 1987-11-29 12:21:52.464881000 14480130640648033053739626279506986.381 +-1312727956 215070586 wrd bvh sny 1977-02-05 23:40:35.111772000 1987-09-25 08:35:27.030011000 18758648537082812849174442431544477.893 +24882212 1646857428 eyt idx gbm 1939-04-12 15:12:49.237090000 1939-02-19 17:04:53.343679000 17326850672382702634701954382050035.631 +105492722 1646464206 ito mxs sni 1967-05-03 15:30:07.710039000 1963-07-22 12:23:08.945284000 -13861156061940708713771009722138202.052 +1550649112 -1216650714 sey wid oau 1952-09-28 23:41:51.592175000 1963-05-18 08:36:43.510414000 -9582638165505928918336193570100710.54 +-1406839088 215332734 aug eyk cni 1988-01-07 09:44:31.271319000 1987-11-16 11:36:35.377907000 10242827229411009404695907036701045.341 +-1326228578 214939512 upk yto oju 1942-12-27 21:42:26.282382000 1939-03-17 18:35:27.517627000 338045159764322606560826836603837.159 +119124418 1646726354 fau jey bvq 1964-12-06 05:52:34.771365000 1939-01-11 14:49:02.082757000 4616563056199102401995642988641328.671 +1456537980 -1216388566 vhc alg oje 1963-08-30 09:45:47.751722000 1963-07-09 11:37:51.858310000 -18098459473177732362814728964944143.092 +1537148490 -1216781788 qlw upb kvq 1991-09-23 16:14:08.316610000 1987-12-12 13:07:09.551855000 14521669649351283343015692455740360.085 +-1312596882 215201660 bvh fal wrd 1977-02-19 00:25:52.198746000 1987-10-08 09:20:44.116985000 18800187545786063138450508607777851.597 +25013286 1646988502 idx mhc kfq 1939-04-25 15:58:06.324064000 1939-03-04 17:50:10.430653000 17368389681085952923978020558283409.335 +105623796 1646595280 mxs qcw wrm 1967-05-16 16:15:24.797013000 1938-12-29 14:03:44.995783000 -13819617053237458424494943545904828.348 +1550780186 -1216519640 wid bmh sey 1952-10-12 00:27:08.679149000 1963-05-31 09:22:00.597388000 -9541099156802678629060127393867336.836 +-1406708014 215463808 eyk ido grm 1988-01-20 10:29:48.358293000 1987-11-29 12:21:52.464881000 10284366238114259693971973212934419.045 +-1326097504 215070586 yto dxs sny 1943-01-09 22:27:43.369356000 1987-09-25 08:35:27.030011000 379584168467572895836893012837210.863 +119255492 1646857428 jey nid fau 1964-12-19 06:37:51.858339000 1939-01-24 15:34:19.169731000 4658102064902352691271709164874702.375 +1456669054 -1216257492 alg epk sni 1963-09-12 10:31:04.838696000 1963-07-22 12:23:08.945284000 -18056920464474482073538662788710769.388 +1537279564 -1216650714 upb ytf oau 1991-10-06 16:59:25.403584000 1963-05-18 08:36:43.510414000 14563208658054533632291758631973733.789 +-1312465808 215332734 fal jep bvh 1977-03-04 01:11:09.285720000 1987-10-21 10:06:01.203959000 18841726554489313427726574784011225.301 +25144360 214677364 mhc qlg oju 1939-05-08 16:43:23.411038000 1939-03-17 18:35:27.517627000 17409928689789203213254086734516783.039 +105754870 1646726354 qcw ugb bvq 1967-05-29 17:00:41.883987000 1939-01-11 14:49:02.082757000 -13778078044534208135218877369671454.644 +1550911260 -1216388566 bmh fql gbv 1952-10-25 01:12:25.766123000 1963-06-13 10:07:17.684362000 -9499560148099428339784061217633963.132 +-1406576940 -1217043936 ido mhs kvq 1988-02-02 11:15:05.445267000 1987-12-12 13:07:09.551855000 10325905246817509983248039389167792.749 +-1325966430 215201660 dxs hcw wrd 1943-01-22 23:13:00.456330000 1987-10-08 09:20:44.116985000 421123177170823185112959189070584.567 +119386566 1646988502 nid rmh cwi 1965-01-01 07:23:08.945313000 1939-02-06 16:19:36.256705000 4699641073605602980547775341108076.079 +1456800128 1646333132 epk ito wrm 1963-09-25 11:16:21.925670000 1938-12-29 14:03:44.995783000 -18015381455771231784262596612477395.684 +1537410638 -1216519640 ytf dxj sey 1991-10-19 17:44:42.490558000 1963-05-31 09:22:00.597388000 14604747666757783921567824808207107.493 +-1312334734 215463808 jep nit xje 1977-03-17 01:56:26.372694000 1987-11-03 10:51:18.290933000 18883265563192563717002640960244599.005 +25275434 214808438 qlg upk sny 1939-05-21 17:28:40.498012000 1987-09-25 08:35:27.030011000 17451467698492453502530152910750156.743 +105885944 1646857428 ugb ykf fau 1967-06-11 17:45:58.970961000 1939-01-24 15:34:19.169731000 -13736539035830957845942811193438080.94 +1551042334 -1216257492 fql jup kfa 1952-11-07 01:57:42.853097000 1963-06-26 10:52:34.771336000 -9458021139396178050507995041400589.428 +-1406445866 -1216912862 mhs qlw oau 1988-02-15 12:00:22.532241000 1963-05-18 08:36:43.510414000 10367444255520760272524105565401166.453 +-1325835356 215332734 hcw lgb bvh 1943-02-04 23:58:17.543304000 1987-10-21 10:06:01.203959000 462662185874073474389025365303958.271 +119517640 214677364 rmh vql gbm 1965-01-14 08:08:26.032287000 1939-02-19 17:04:53.343679000 4741180082308853269823841517341449.783 +1456931202 1646464206 ito mxs bvq 1963-10-08 12:01:39.012644000 1939-01-11 14:49:02.082757000 -17973842447067981494986530436244021.98 +1537541712 -1216388566 dxj hcn gbv 1991-11-01 18:29:59.577532000 1963-06-13 10:07:17.684362000 14646286675461034210843890984440481.197 +-1312203660 -1217043936 nit rmx cni 1977-03-30 02:41:43.459668000 1987-11-16 11:36:35.377907000 18924804571895814006278707136477972.709 +25406508 214939512 upk yto wrd 1939-06-03 18:13:57.584986000 1987-10-08 09:20:44.116985000 17493006707195703791806219086983530.447 +106017018 1646988502 ykf doj cwi 1967-06-24 18:31:16.057935000 1939-02-06 16:19:36.256705000 -13695000027127707556666745017204707.236 +1551173408 1646333132 jup nyt oje 1952-11-20 02:42:59.940071000 1963-07-09 11:37:51.858310000 -9416482130692927761231928865167215.724 +-1406314792 -1216781788 qlw upb sey 1988-02-28 12:45:39.619215000 1963-05-31 09:22:00.597388000 10408983264224010561800171741634540.157 +-1325704282 215463808 lgb pkf xje 1943-02-18 00:43:34.630278000 1987-11-03 10:51:18.290933000 504201194577323763665091541537331.975 +\N 214808438 vql aup kfq 1965-01-27 08:53:43.119261000 1939-03-04 17:50:10.430653000 4782719091012103559099907693574823.487 +119648714 1646595280 mxs qcw fau 1963-10-21 12:46:56.099618000 1939-01-24 15:34:19.169731000 -17932303438364731205710464260010648.276 +1457062276 -1216257492 hcn lgr kfa 1991-11-14 19:15:16.664506000 1963-06-26 10:52:34.771336000 14687825684164284500119957160673854.901 +1537672786 -1216912862 rmx vqc grm 1977-04-12 03:27:00.546642000 1987-11-29 12:21:52.464881000 18966343580599064295554773312711346.413 +-1312072586 215070586 yto dxs bvh 1939-06-16 18:59:14.671960000 1987-10-21 10:06:01.203959000 17534545715898954081082285263216904.151 +25537582 214677364 doj \N gbm 1967-07-07 19:16:33.144909000 1939-02-19 17:04:53.343679000 -13653461018424457267390678840971333.532 +106148092 1646464206 nyt hsn sni 1952-12-03 03:28:17.027045000 1963-07-22 12:23:08.945284000 -9374943121989677471955862688933842.02 +1551304482 -1216650714 upb rdx gbv 1988-03-12 13:30:56.706189000 1963-06-13 10:07:17.684362000 10450522272927260851076237917867913.861 +-1406183718 -1217043936 pkf ytf cni 1943-03-03 01:28:51.717252000 1987-11-16 11:36:35.377907000 545740203280574052941157717770705.679 +-1325573208 214939512 aup toj oju 1965-02-09 09:39:00.206235000 1939-03-17 18:35:27.517627000 4824258099715353848375973869808197.191 +119779788 1646726354 qcw eyt cwi 1963-11-03 13:32:13.186592000 1939-02-06 16:19:36.256705000 -17890764429661480916434398083777274.572 +1457193350 1646333132 lgr ugb oje 1991-11-27 20:00:33.751480000 1963-07-09 11:37:51.858310000 14729364692867534789396023336907228.605 +1537803860 -1216781788 vqc pkv kvq 1977-04-25 04:12:17.633616000 1987-12-12 13:07:09.551855000 19007882589302314584830839488944720.117 +-1311941512 215201660 dxs aug xje 1939-06-29 19:44:31.758934000 1987-11-03 10:51:18.290933000 17576084724602204370358351439450277.855 +25668656 214808438 hsn hcw kfq 1967-07-20 20:01:50.231883000 1939-03-04 17:50:10.430653000 -13611922009721206978114612664737959.828 +106279166 1646595280 rdx lwr wrm 1952-12-16 04:13:34.114019000 1938-12-29 14:03:44.995783000 -9333404113286427182679796512700468.316 +1551435556 -1216519640 ytf vhc kfa 1988-03-25 14:16:13.793163000 1963-06-26 10:52:34.771336000 10492061281630511140352304094101287.565 +-1406052644 -1216912862 toj dxj grm 1943-03-16 02:14:08.804226000 1987-11-29 12:21:52.464881000 587279211983824342217223894004079.383 +-1325442134 215070586 eyt xsn sny 1965-02-22 10:24:17.293209000 1987-09-25 08:35:27.030011000 4865797108418604137652040046041570.895 +119910862 1646857428 ugb idx gbm 1963-11-16 14:17:30.273566000 1939-02-19 17:04:53.343679000 -17849225420958230627158331907543900.868 +1457324424 1646464206 pkv ykf sni 1991-12-10 20:45:50.838454000 1963-07-22 12:23:08.945284000 14770903701570785078672089513140602.309 +1537934934 -1216650714 aug toa oau 1977-05-08 04:57:34.720590000 1963-05-18 08:36:43.510414000 19049421598005564874106905665178093.821 +-1311810438 215332734 hcw eyk cni 1939-07-12 20:29:48.845908000 1987-11-16 11:36:35.377907000 17617623733305454659634417615683651.559 +25799730 214939512 lwr lgb oju 1967-08-02 20:47:07.318857000 1939-03-17 18:35:27.517627000 -13570383001017956688838546488504586.124 +106410240 1646726354 vhc pbv bvq 1952-12-29 04:58:51.200993000 1939-01-11 14:49:02.082757000 -9291865104583176893403730336467094.612 +1551566630 -1216388566 dxj alg oje 1988-04-07 15:01:30.880137000 1963-07-09 11:37:51.858310000 10533600290333761429628370270334661.269 +-1405921570 -1216781788 xsn hcn kvq 1943-03-29 02:59:25.891200000 1987-12-12 13:07:09.551855000 628818220687074631493290070237453.087 +-1325311060 215201660 idx cwr wrd 1965-03-07 11:09:34.380183000 1987-10-08 09:20:44.116985000 4907336117121854426928106222274944.599 +120041936 1646988502 ykf mhc kfq 1963-11-29 15:02:47.360539000 1939-03-04 17:50:10.430653000 -17807686412254980337882265731310527.164 +1457455498 1646595280 toa doj wrm 1991-12-23 21:31:07.925428000 1938-12-29 14:03:44.995783000 14812442710274035367948155689373976.013 +1538066008 -1216519640 eyk xse sey 1977-05-21 05:42:51.807564000 1963-05-31 09:22:00.597388000 19090960606708815163382971841411467.525 +-1311679364 215463808 lgb ido grm 1939-07-25 21:15:05.932882000 1987-11-29 12:21:52.464881000 17659162742008704948910483791917025.263 +25930804 215070586 pbv pkf \N 1967-08-15 21:32:24.405831000 1987-09-25 08:35:27.030011000 -13528843992314706399562480312271212.42 +106541314 1646857428 alg tfa sny 1953-01-11 05:44:08.287967000 1939-01-24 15:34:19.169731000 -9250326095879926604127664160233720.908 +1551697704 -1216257492 hcn epk fau 1988-04-20 15:46:47.967111000 1963-07-22 12:23:08.945284000 10575139299037011718904436446568034.973 +-1405790496 -1216650714 cwr lgr sni 1943-04-11 03:44:42.978174000 1963-05-18 08:36:43.510414000 670357229390324920769356246470826.791 +-1325179986 215332734 mhc gbv oau 1965-03-20 11:54:51.467156000 1987-10-21 10:06:01.203959000 4948875125825104716204172398508318.303 +120173010 214677364 doj qlg bvh 1963-12-12 15:48:04.447513000 1939-03-17 18:35:27.517627000 -17766147403551730048606199555077153.46 +1457586572 1646726354 xse hsn oju 1992-01-05 22:16:25.012402000 1939-01-11 14:49:02.082757000 14853981718977285657224221865607349.717 +1538197082 -1216388566 ido cwi bvq 1977-06-03 06:28:08.894538000 1963-06-13 10:07:17.684362000 19132499615412065452659038017644841.229 +-1311548290 -1217043936 pkf mhs gbv 1939-08-07 22:00:23.019856000 1987-12-12 13:07:09.551855000 17700701750711955238186549968150398.967 +26061878 215201660 tfa toj kvq 1967-08-28 22:17:41.492805000 1987-10-08 09:20:44.116985000 -13487304983611456110286414136037838.716 +106672388 1646988502 epk xje wrd 1953-01-24 06:29:25.374941000 1939-02-06 16:19:36.256705000 -9208787087176676314851597984000347.204 +1551828778 1646333132 lgr ito cwi 1988-05-03 16:32:05.054085000 1938-12-29 14:03:44.995783000 10616678307740262008180502622801408.677 +-1405659422 -1216519640 gbv pkv wrm 1943-04-24 04:30:00.065147000 1963-05-31 09:22:00.597388000 711896238093575210045422422704200.495 +-1325048912 215463808 qlg kfa sey 1965-04-02 12:40:08.554130000 1987-11-03 10:51:18.290933000 4990414134528355005480238574741692.007 +120304084 214808438 hsn upk xje 1963-12-25 16:33:21.534487000 1987-09-25 08:35:27.030011000 -17724608394848479759330133378843779.756 +1457717646 1646857428 cwi lwr sny 1992-01-18 23:01:42.099376000 1939-01-24 15:34:19.169731000 14895520727680535946500288041840723.421 +1538328156 -1216257492 mhs gbm fau 1977-06-16 07:13:25.981512000 1963-06-26 10:52:34.771336000 19174038624115315741935104193878214.933 +-1311417216 -1216912862 toj qlw kfa 1939-08-20 22:45:40.106830000 1963-05-18 08:36:43.510414000 17742240759415205527462616144383772.671 +26192952 215332734 xje xsn oau 1967-09-10 23:02:58.579779000 1987-10-21 10:06:01.203959000 -13445765974908205821010347959804465.012 +106803462 214677364 ito cni bvh 1953-02-06 07:14:42.461915000 1939-02-19 17:04:53.343679000 -9167248078473426025575531807766973.5 +1551959852 1646464206 pkv mxs gbm 1988-05-16 17:17:22.141059000 1939-01-11 14:49:02.082757000 10658217316443512297456568799034782.381 +-1405528348 -1216388566 kfa toa bvq 1943-05-07 05:15:17.152121000 1963-06-13 10:07:17.684362000 753435246796825499321488598937574.199 +-1324917838 -1217043936 upk oje gbv 1965-04-15 13:25:25.641104000 1987-11-16 11:36:35.377907000 5031953143231605294756304750975065.711 +120435158 214939512 lwr yto cni 1964-01-07 17:18:38.621461000 1987-10-08 09:20:44.116985000 -17683069386145229470054067202610406.052 +1457848720 1646988502 gbm pbv wrd 1992-01-31 23:46:59.186350000 1939-02-06 16:19:36.256705000 14937059736383786235776354218074097.125 +1538459230 1646333132 qlw kfq cwi 1977-06-29 07:58:43.068486000 1963-07-09 11:37:51.858310000 19215577632818566031211170370111588.637 +-1311286142 -1216781788 xsn upb oje 1939-09-02 23:30:57.193804000 1963-05-31 09:22:00.597388000 17783779768118455816738682320617146.375 +26324026 215463808 cni cwr sey 1967-09-23 23:48:15.666753000 1987-11-03 10:51:18.290933000 -13404226966204955531734281783571091.308 +106934536 214808438 mxs grm xje 1953-02-19 07:59:59.548889000 1939-03-04 17:50:10.430653000 -9125709069770175736299465631533599.796 +1552090926 1646595280 toa qcw kfq 1988-05-29 18:02:39.228033000 1939-01-24 15:34:19.169731000 10699756325146762586732634975268156.085 +-1405397274 -1216257492 oje xse fau 1943-05-20 06:00:34.239095000 1963-06-26 10:52:34.771336000 794974255500075788597554775170947.903 +-1324786764 -1216912862 yto sni kfa 1965-04-28 14:10:42.728078000 1987-11-29 12:21:52.464881000 5073492151934855584032370927208439.415 +120566232 215070586 pbv dxs grm 1964-01-20 18:03:55.708435000 1987-10-21 10:06:01.203959000 -17641530377441979180778001026377032.348 +1457979794 214677364 kfq tfa bvh 1992-02-14 00:32:16.273324000 1939-02-19 17:04:53.343679000 14978598745087036525052420394307470.829 +1538590304 1646464206 upb oju gbm 1977-07-12 08:44:00.155460000 1963-07-22 12:23:08.945284000 19257116641521816320487236546344962.341 +-1311155068 -1216650714 cwr ytf sni 1939-09-16 00:16:14.280778000 1963-06-13 10:07:17.684362000 17825318776821706106014748496850520.079 +26455100 -1217043936 grm gbv gbv 1967-10-07 00:33:32.753727000 1987-11-16 11:36:35.377907000 -13362687957501705242458215607337717.604 +107065610 214939512 qcw kvq cni 1953-03-04 08:45:16.635863000 1939-03-17 18:35:27.517627000 -9084170061066925447023399455300226.092 +1552222000 1646726354 xse ugb oju 1988-06-11 18:47:56.315006000 1939-02-06 16:19:36.256705000 10741295333850012876008701151501529.789 +-1405266200 1646333132 sni cwi cwi 1943-06-02 06:45:51.326069000 1963-07-09 11:37:51.858310000 836513264203326077873620951404321.607 +-1324655690 \N dxs wrm oje 1965-05-11 14:55:59.815052000 1987-12-12 13:07:09.551855000 5115031160638105873308437103441813.119 +120697306 -1216781788 tfa hcw kvq 1964-02-02 18:49:12.795409000 1987-11-03 10:51:18.290933000 -17599991368738728891501934850143658.644 +1458110868 215201660 oju xje xje 1992-02-27 01:17:33.360298000 1939-03-04 17:50:10.430653000 15020137753790286814328486570540844.533 +1538721378 214808438 ytf sny kfq 1977-07-25 09:29:17.242434000 1938-12-29 14:03:44.995783000 19298655650225066609763302722578336.045 +-1311023994 1646595280 gbv dxj wrm 1939-09-29 01:01:31.367752000 1963-06-26 10:52:34.771336000 17866857785524956395290814673083893.783 +26586174 -1216519640 kvq kfa kfa 1967-10-20 01:18:49.840701000 1987-11-29 12:21:52.464881000 -13321148948798454953182149431104343.9 +107196684 -1216912862 ugb oau grm 1953-03-17 09:30:33.722837000 1987-09-25 08:35:27.030011000 -9042631052363675157747333279066852.388 +1552353074 215070586 cwi ykf sny 1988-06-24 19:33:13.401980000 1939-02-19 17:04:53.343679000 10782834342553263165284767327734903.493 +-1405135126 1646857428 wrm gbm gbm 1943-06-15 07:31:08.413043000 1963-07-22 12:23:08.945284000 878052272906576367149687127637695.311 +-1324524616 1646464206 hcw bvq sni 1965-05-24 15:41:16.902026000 1963-05-18 08:36:43.510414000 5156570169341356162584503279675186.823 +120828380 -1216650714 xje lgb oau 1964-02-15 19:34:29.882383000 1987-11-16 11:36:35.377907000 -17558452360035478602225868673910284.94 +1458241942 215332734 sny cni cni 1992-03-11 02:02:50.447272000 1939-03-17 18:35:27.517627000 15061676762493537103604552746774218.237 +1538852452 214939512 dxj wrd oju 1977-08-07 10:14:34.329408000 1939-01-11 14:49:02.082757000 19340194658928316899039368898811709.749 +-1310892920 1646726354 kfa hcn bvq 1939-10-12 01:46:48.454726000 1963-07-09 11:37:51.858310000 17908396794228206684566880849317267.487 +26717248 -1216388566 oau oje oje 1967-11-02 02:04:06.927675000 1987-12-12 13:07:09.551855000 -13279609940095204663906083254870970.196 +107327758 -1216781788 ykf sey kvq 1953-03-30 10:15:50.809811000 1987-10-08 09:20:44.116985000 -9001092043660424868471267102833478.684 +1552484148 215201660 gbm doj wrd 1988-07-07 20:18:30.488954000 1939-03-04 17:50:10.430653000 10824373351256513454560833503968277.197 +-1405004052 1646988502 bvq kfq kfq 1943-06-28 08:16:25.500017000 1938-12-29 14:03:44.995783000 919591281609826656425753303871069.015 +-1324393542 1646595280 lgb fau wrm 1965-06-06 16:26:33.989000000 1963-05-31 09:22:00.597388000 5198109178044606451860569455908560.527 +120959454 -1216519640 cni pkf sey 1964-02-28 20:19:46.969357000 1987-11-29 12:21:52.464881000 -17516913351332228312949802497676911.236 +1458373016 215463808 wrd grm grm 1992-03-24 02:48:07.534246000 1987-09-25 08:35:27.030011000 15103215771196787392880618923007591.941 +1538983526 215070586 hcn bvh sny 1977-08-20 10:59:51.416382000 1939-01-24 15:34:19.169731000 19381733667631567188315435075045083.453 +-1310761846 1646857428 oje lgr fau 1939-10-25 02:32:05.541700000 1963-07-22 12:23:08.945284000 17949935802931456973842947025550641.191 +26848322 -1216257492 sey sni sni 1967-11-15 02:49:24.014649000 1963-05-18 08:36:43.510414000 -13238070931391954374630017078637596.492 +107458832 -1216650714 doj wid oau 1953-04-12 11:01:07.896785000 1987-10-21 10:06:01.203959000 -8959553034957174579195200926600104.98 +1552615222 215332734 kfq hsn bvh 1988-07-20 21:03:47.575928000 1939-03-17 18:35:27.517627000 10865912359959763743836899680201650.901 +-1404872978 214677364 fau oju oju 1943-07-11 09:01:42.586991000 1939-01-11 14:49:02.082757000 961130290313076945701819480104442.719 +-1324262468 1646726354 pkf jey bvq 1965-06-19 17:11:51.075974000 1963-06-13 10:07:17.684362000 5239648186747856741136635632141934.231 +121090528 -1216388566 grm toj gbv 1964-03-12 21:05:04.056331000 1987-12-12 13:07:09.551855000 -17475374342628978023673736321443537.532 +1458504090 -1217043936 bvh kvq kvq 1992-04-06 03:33:24.621220000 1987-10-08 09:20:44.116985000 15144754779900037682156685099240965.645 +1539114600 215201660 lgr fal wrd 1977-09-02 11:45:08.503356000 1939-02-06 16:19:36.256705000 19423272676334817477591501251278457.157 +-1310630772 1646988502 sni pkv cwi 1939-11-07 03:17:22.628674000 1938-12-29 14:03:44.995783000 17991474811634707263119013201784014.895 +26979396 1646333132 wid wrm wrm 1967-11-28 03:34:41.101623000 1963-05-31 09:22:00.597388000 -13196531922688704085353950902404222.788 +107589906 -1216519640 hsn bmh sey 1953-04-25 11:46:24.983759000 1987-11-03 10:51:18.290933000 -8918014026253924289919134750366731.276 +1552746296 215463808 oju lwr xje 1988-08-02 21:49:04.662902000 1987-09-25 08:35:27.030011000 10907451368663014033112965856435024.605 +-1404741904 214808438 jey sny sny 1943-07-24 09:46:59.673965000 1939-01-24 15:34:19.169731000 1002669299016327234977885656337816.423 +-1324131394 1646857428 toj nid fau 1965-07-02 17:57:08.162948000 1963-06-26 10:52:34.771336000 5281187195451107030412701808375307.935 +121221602 -1216257492 kvq xsn kfa 1964-03-25 21:50:21.143305000 1963-05-18 08:36:43.510414000 -17433835333925727734397670145210163.828 +1458635164 -1216912862 fal oau oau 1992-04-19 04:18:41.708194000 1987-10-21 10:06:01.203959000 15186293788603287971432751275474339.349 +1539245674 215332734 pkv jep bvh 1977-09-15 12:30:25.590330000 1939-02-19 17:04:53.343679000 19464811685038067766867567427511830.861 +-1310499698 214677364 wrm toa gbm 1939-11-20 04:02:39.715648000 1939-01-11 14:49:02.082757000 18033013820337957552395079378017388.599 +27110470 1646464206 bmh bvq bvq 1967-12-11 04:19:58.188597000 1963-06-13 10:07:17.684362000 -13154992913985453796077884726170849.084 +107720980 -1216388566 lwr fql gbv 1953-05-08 12:31:42.070732000 1987-11-16 11:36:35.377907000 -8876475017550674000643068574133357.572 +1552877370 -1217043936 sny pbv cni 1988-08-15 22:34:21.749876000 1987-10-08 09:20:44.116985000 10948990377366264322389032032668398.309 +-1404610830 214939512 nid wrd wrd 1943-08-06 10:32:16.760939000 1939-02-06 16:19:36.256705000 1044208307719577524253951832571190.127 +-1324000320 1646988502 xsn rmh cwi 1965-07-15 18:42:25.249922000 1963-07-09 11:37:51.858310000 5322726204154357319688767984608681.639 +121352676 1646333132 oau cwr oje 1964-04-07 22:35:38.230279000 1963-05-31 09:22:00.597388000 -17392296325222477445121603968976790.124 +1458766238 -1216781788 jep sey sey 1992-05-02 05:03:58.795168000 1987-11-03 10:51:18.290933000 15227832797306538260708817451707713.053 +1539376748 215463808 toa nit xje 1977-09-28 13:15:42.677304000 1939-03-04 17:50:10.430653000 19506350693741318056143633603745204.565 +-1310368624 214808438 bvq xse kfq 1939-12-03 04:47:56.802622000 1939-01-24 15:34:19.169731000 18074552829041207841671145554250762.303 +27241544 1646595280 fql fau fau 1967-12-24 05:05:15.275570000 1963-06-26 10:52:34.771336000 -13113453905282203506801818549937475.38 +107852054 -1216257492 pbv jup kfa 1953-05-21 13:16:59.157706000 1987-11-29 12:21:52.464881000 -8834936008847423711367002397899983.868 +1553008444 -1216912862 wrd tfa grm 1988-08-28 23:19:38.836850000 1987-10-21 10:06:01.203959000 10990529386069514611665098208901772.013 +-1404479756 215070586 rmh bvh bvh 1943-08-19 11:17:33.847913000 1939-02-19 17:04:53.343679000 1085747316422827813530018008804563.831 +-1323869246 214677364 cwr vql gbm 1965-07-28 19:27:42.336896000 1963-07-22 12:23:08.945284000 5364265212857607608964834160842055.343 +121483750 1646464206 sey gbv sni 1964-04-20 23:20:55.317253000 1963-06-13 10:07:17.684362000 -17350757316519227155845537792743416.42 +1458897312 -1216650714 nit wid gbv 1992-05-15 05:49:15.882142000 1987-11-16 11:36:35.377907000 15269371806009788549984883627941086.757 +1539507822 -1217043936 xse rmx cni 1977-10-11 14:00:59.764278000 1939-03-17 18:35:27.517627000 19547889702444568345419699779978578.269 +-1310237550 214939512 fau cwi oju 1939-12-16 05:33:13.889596000 1939-02-06 16:19:36.256705000 18116091837744458130947211730484136.007 +27372618 1646726354 jup jey cwi 1968-01-06 05:50:32.362544000 1963-07-09 11:37:51.858310000 -13071914896578953217525752373704101.676 +107983128 1646333132 tfa nyt oje 1953-06-03 14:02:16.244680000 1987-12-12 13:07:09.551855000 -8793397000144173422090936221666610.164 +1553139518 -1216781788 bvh xje kvq 1988-09-11 00:04:55.923824000 1987-11-03 10:51:18.290933000 11032068394772764900941164385135145.717 +-1404348682 215201660 vql fal xje 1943-09-01 12:02:50.934887000 1939-03-04 17:50:10.430653000 1127286325126078102806084185037937.535 +-1323738172 214808438 gbv aup kfq 1965-08-10 20:12:59.423870000 1938-12-29 14:03:44.995783000 5405804221560857898240900337075429.047 +121614824 1646595280 wid kfa wrm 1964-05-04 00:06:12.404227000 1963-06-26 10:52:34.771336000 -17309218307815976866569471616510042.716 +1459028386 -1216519640 rmx bmh kfa 1992-05-28 06:34:32.969116000 1987-11-29 12:21:52.464881000 15310910814713038839260949804174460.461 +1539638896 -1216912862 cwi vqc grm 1977-10-24 14:46:16.851252000 1987-09-25 08:35:27.030011000 19589428711147818634695765956211951.973 +-1310106476 215070586 jey gbm sny 1939-12-29 06:18:30.976570000 1939-02-19 17:04:53.343679000 18157630846447708420223277906717509.711 +27503692 1646857428 nyt nid gbm 1968-01-19 06:35:49.449518000 1963-07-22 12:23:08.945284000 -13030375887875702928249686197470727.972 +108114202 1646464206 xje rdx sni 1953-06-16 14:47:33.331654000 1963-05-18 08:36:43.510414000 -8751857991440923132814870045433236.46 +1553270592 -1216650714 fal cni oau 1988-09-24 00:50:13.010798000 1987-11-16 11:36:35.377907000 11073607403476015190217230561368519.421 +-1404217608 215332734 aup jep cni 1943-09-14 12:48:08.021861000 1939-03-17 18:35:27.517627000 1168825333829328392082150361271311.239 +-1323607098 214939512 kfa eyt oju 1965-08-23 20:58:16.510844000 1939-01-11 14:49:02.082757000 5447343230264108187516966513308802.751 +121745898 1646726354 bmh oje bvq 1964-05-17 00:51:29.491201000 1963-07-09 11:37:51.858310000 -17267679299112726577293405440276669.012 +1459159460 -1216388566 vqc fql oje 1992-06-10 07:19:50.056090000 1987-12-12 13:07:09.551855000 15352449823416289128537015980407834.165 +1539769970 -1216781788 gbm aug kvq 1977-11-06 15:31:33.938226000 1987-10-08 09:20:44.116985000 19630967719851068923971832132445325.677 +-1309975402 215201660 nid kfq wrd 1940-01-11 07:03:48.063544000 1939-03-04 17:50:10.430653000 18199169855150958709499344082950883.415 +27634766 1646988502 rdx rmh kfq 1968-02-01 07:21:06.536492000 1938-12-29 14:03:44.995783000 -12988836879172452638973620021237354.268 +108245276 1646595280 cni vhc wrm 1953-06-29 15:32:50.418628000 1963-05-31 09:22:00.597388000 -8710318982737672843538803869199862.756 +1553401666 -1216519640 jep grm sey 1988-10-07 01:35:30.097772000 1987-11-29 12:21:52.464881000 11115146412179265479493296737601893.125 +-1404086534 215463808 eyt nit grm 1943-09-27 13:33:25.108835000 1987-09-25 08:35:27.030011000 1210364342532578681358216537504684.943 +-1323476024 215070586 oje idx sny 1965-09-05 21:43:33.597818000 1939-01-24 15:34:19.169731000 -14457305820759193856084943006068793.4 +121876972 1646857428 fql sni fau 1964-05-30 01:36:46.578175000 1963-07-22 12:23:08.945284000 -17226140290409476288017339264043295.308 +1459290534 -1216257492 aug jup sni 1992-06-23 08:05:07.143064000 1963-05-18 08:36:43.510414000 15393988832119539417813082156641207.869 +1539901044 -1216650714 kfq eyk oau 1977-11-19 16:16:51.025199000 1987-10-21 10:06:01.203959000 -263296658230627690673753078742955.402 +-1309844328 215332734 rmh oju bvh 1940-01-24 07:49:05.150517000 1939-03-17 18:35:27.517627000 18240708863854208998775410259184257.119 +27765840 214677364 vhc vql oju 1968-02-14 08:06:23.623466000 1939-01-11 14:49:02.082757000 -12947297870469202349697553845003980.564 +108376350 1646726354 grm alg bvq 1953-07-12 16:18:07.505602000 1963-06-13 10:07:17.684362000 13935904563474403629658083479999083.81 +1553532740 -1216388566 nit kvq gbv \N 1987-12-12 13:07:09.551855000 11156685420882515768769362913835266.829 +-1403955460 -1217043936 idx rmx kvq 1988-10-20 02:20:47.184746000 1987-10-08 09:20:44.116985000 1251903351235828970634282713738058.647 +-1323344950 215201660 sni mhc wrd 1943-10-10 14:18:42.195809000 1939-02-06 16:19:36.256705000 -14415766812055943566808876829835419.696 +122008046 1646988502 jup wrm cwi 1965-09-18 22:28:50.684792000 1938-12-29 14:03:44.995783000 -17184601281706225998741273087809921.604 +1459421608 1646333132 eyk nyt wrm 1964-06-12 02:22:03.665149000 1963-05-31 09:22:00.597388000 15435527840822789707089148332874581.573 +1540032118 -1216519640 oju ido sey 1992-07-06 08:50:24.230037000 1987-11-03 10:51:18.290933000 -221757649527377401397686902509581.698 +-1309713254 215463808 vql sny xje 1977-12-02 17:02:08.112173000 1987-09-25 08:35:27.030011000 18282247872557459288051476435417630.823 +27896914 214808438 alg aup sny 1940-02-06 08:34:22.237491000 1939-01-24 15:34:19.169731000 -12905758861765952060421487668770606.86 +108507424 1646857428 kvq epk fau 1968-02-27 08:51:40.710440000 1963-06-26 10:52:34.771336000 13977443572177653918934149656232457.514 +1553663814 -1216257492 rmx oau kfa 1953-07-25 17:03:24.592576000 1963-05-18 08:36:43.510414000 11198224429585766058045429090068640.533 +-1403824386 -1216912862 mhc vqc oau 1988-11-02 03:06:04.271720000 1987-10-21 10:06:01.203959000 1293442359939079259910348889971432.351 +-1323213876 215332734 wrm qlg bvh 1943-10-23 15:03:59.282783000 1939-02-19 17:04:53.343679000 -14374227803352693277532810653602045.992 +122139120 214677364 nyt bvq gbm 1965-10-01 23:14:07.771766000 1939-01-11 14:49:02.082757000 -17143062273002975709465206911576547.9 +1459552682 1646464206 ido rdx bvq 1964-06-25 03:07:20.752123000 1963-06-13 10:07:17.684362000 15477066849526039996365214509107955.277 +1540163192 -1216388566 sny mhs gbv 1992-07-19 09:35:41.317011000 1987-11-16 11:36:35.377907000 -180218640824127112121620726276207.994 +-1309582180 -1217043936 aup wrd cni 1977-12-15 17:47:25.199147000 1987-10-08 09:20:44.116985000 18323786881260709577327542611651004.527 +28027988 214939512 epk eyt wrd 1940-02-19 09:19:39.324465000 1939-02-06 16:19:36.256705000 -12864219853062701771145421492537233.156 +108638498 1646988502 oau ito cwi 1968-03-11 09:36:57.797414000 1963-07-09 11:37:51.858310000 14018982580880904208210215832465831.218 +1553794888 1646333132 vqc sey oje 1953-08-07 17:48:41.679550000 1963-05-31 09:22:00.597388000 11239763438289016347321495266302014.237 +-1403693312 -1216781788 qlg aug sey 1988-11-15 03:51:21.358694000 1987-11-03 10:51:18.290933000 1334981368642329549186415066204806.055 +-1323082802 215463808 bvq upk xje 1943-11-05 15:49:16.369757000 1939-03-04 17:50:10.430653000 -14332688794649442988256744477368672.288 +\N 214808438 rdx fau kfq 1965-10-14 23:59:24.858740000 1939-01-24 15:34:19.169731000 -17101523264299725420189140735343174.196 +122270194 1646595280 mhs vhc fau 1964-07-08 03:52:37.839097000 1963-06-26 10:52:34.771336000 15518605858229290285641280685341328.981 +1459683756 -1216257492 wrd qlw kfa 1992-08-01 10:20:58.403985000 1987-11-29 12:21:52.464881000 -138679632120876822845554550042834.29 +1540294266 -1216912862 eyt bvh grm 1977-12-28 18:32:42.286121000 1987-10-21 10:06:01.203959000 18365325889963959866603608787884378.231 +-1309451106 215070586 ito idx bvh 1940-03-03 10:04:56.411439000 1939-02-19 17:04:53.343679000 -12822680844359451481869355316303859.452 +28159062 214677364 sey mxs gbm 1968-03-24 10:22:14.884388000 1963-07-22 12:23:08.945284000 14060521589584154497486282008699204.922 +108769572 1646464206 aug wid sni 1953-08-20 18:33:58.766524000 1963-06-13 10:07:17.684362000 11281302446992266636597561442535387.941 +1553925962 -1216650714 upk eyk gbv 1988-11-28 04:36:38.445668000 1987-11-16 11:36:35.377907000 1376520377345579838462481242438179.759 +-1403562238 -1217043936 fau yto cni 1943-11-18 16:34:33.456731000 1939-03-17 18:35:27.517627000 -14291149785946192698980678301135298.584 +-1322951728 214939512 vhc jey oju 1965-10-28 00:44:41.945714000 1939-02-06 16:19:36.256705000 -17059984255596475130913074559109800.492 +122401268 1646726354 qlw alg cwi 1964-07-21 04:37:54.926071000 1963-07-09 11:37:51.858310000 15560144866932540574917346861574702.685 +1459814830 1646333132 bvh upb oje 1992-08-14 11:06:15.490959000 1987-12-12 13:07:09.551855000 -97140623417626533569488373809460.586 +1540425340 -1216781788 idx fal kvq 1978-01-10 19:17:59.373095000 1987-11-03 10:51:18.290933000 18406864898667210155879674964117751.935 +-1309320032 215201660 mxs mhc xje 1940-03-16 10:50:13.498413000 1939-03-04 17:50:10.430653000 -12781141835656201192593289140070485.748 +28290136 214808438 wid qcw kfq 1968-04-06 11:07:31.971362000 1938-12-29 14:03:44.995783000 14102060598287404786762348184932578.626 +108900646 1646595280 eyk bmh wrm 1953-09-02 19:19:15.853498000 1963-06-26 10:52:34.771336000 11322841455695516925873627618768761.645 +1554057036 -1216519640 yto ido kfa 1988-12-11 05:21:55.532642000 1987-11-29 12:21:52.464881000 1418059386048830127738547418671553.463 +-1403431164 -1216912862 jey dxs grm 1943-12-01 17:19:50.543705000 1987-09-25 08:35:27.030011000 -14249610777242942409704612124901924.88 +-1322820654 215070586 alg nid sny 1965-11-10 01:29:59.032688000 1939-02-19 17:04:53.343679000 -17018445246893224841637008382876426.788 +122532342 1646857428 upb epk gbm 1964-08-03 05:23:12.013045000 1963-07-22 12:23:08.945284000 15601683875635790864193413037808076.389 +1459945904 1646464206 fal ytf sni 1992-08-27 11:51:32.577933000 1963-05-18 08:36:43.510414000 -55601614714376244293422197576086.882 +1540556414 -1216650714 mhc jep oau 1978-01-23 20:03:16.460069000 1987-11-16 11:36:35.377907000 18448403907370460445155741140351125.639 +-1309188958 215332734 qcw qlg cni 1940-03-29 11:35:30.585387000 1939-03-17 18:35:27.517627000 -12739602826952950903317222963837112.044 +28421210 214939512 bmh ugb oju 1968-04-19 11:52:49.058336000 1939-01-11 14:49:02.082757000 14143599606990655076038414361165952.33 +109031720 1646726354 ido fql bvq 1953-09-15 20:04:32.940472000 1963-07-09 11:37:51.858310000 11364380464398767215149693795002135.349 +1554188110 -1216388566 dxs mhs oje 1988-12-24 06:07:12.619616000 1987-12-12 13:07:09.551855000 1459598394752080417014613594904927.167 +-1403300090 -1216781788 nid hcw kvq 1943-12-14 18:05:07.630679000 1987-10-08 09:20:44.116985000 \N +-1322689580 215201660 epk rmh wrd 1965-11-23 02:15:16.119662000 1939-03-04 17:50:10.430653000 -14208071768539692120428545948668551.176 +122663416 1646988502 ytf ito kfq 1964-08-16 06:08:29.100019000 1938-12-29 14:03:44.995783000 -16976906238189974552360942206643053.084 +1460076978 1646595280 jep dxj wrm 1992-09-09 12:36:49.664907000 1963-05-31 09:22:00.597388000 15643222884339041153469479214041450.093 +1540687488 -1216519640 qlg nit sey 1978-02-05 20:48:33.547043000 1987-11-29 12:21:52.464881000 -14062606011125955017356021342713.178 +-1309057884 215463808 ugb upk grm 1940-04-11 12:20:47.672361000 1987-09-25 08:35:27.030011000 18489942916073710734431807316584499.343 +28552284 215070586 fql ykf sny 1968-05-02 12:38:06.145310000 1939-01-24 15:34:19.169731000 -12698063818249700614041156787603738.34 +109162794 1646857428 mhs jup fau 1953-09-28 20:49:50.027446000 1963-07-22 12:23:08.945284000 14185138615693905365314480537399326.034 +1554319184 -1216257492 hcw qlw sni 1989-01-06 06:52:29.706590000 1963-05-18 08:36:43.510414000 11405919473102017504425759971235509.053 +-1403169016 -1216650714 rmh lgb oau 1943-12-27 18:50:24.717653000 1987-10-21 10:06:01.203959000 1501137403455330706290679771138300.871 +-1322558506 215332734 ito vql bvh 1965-12-06 03:00:33.206636000 1939-03-17 18:35:27.517627000 -14166532759836441831152479772435177.472 +122794490 214677364 dxj mxs oju 1964-08-29 06:53:46.186993000 1939-01-11 14:49:02.082757000 -16935367229486724263084876030409679.38 +1460208052 1646726354 nit hcn bvq 1992-09-22 13:22:06.751881000 1963-06-13 10:07:17.684362000 15684761893042291442745545390274823.797 +1540818562 -1216388566 upk rmx gbv 1978-02-18 21:33:50.634017000 1987-12-12 13:07:09.551855000 -21240171529866529632202202809594852.69 +-1308926810 -1217043936 ykf yto kvq 1940-04-24 13:06:04.759335000 1987-10-08 09:20:44.116985000 18531481924776961023707873492817873.047 +28683358 215201660 jup doj wrd 1968-05-15 13:23:23.232284000 1939-02-06 16:19:36.256705000 -12656524809546450324765090611370364.636 +109293868 1646988502 qlw nyt cwi 1953-10-11 21:35:07.114420000 1938-12-29 14:03:44.995783000 14226677624397155654590546713632699.738 +1554450258 1646333132 lgb upb wrm 1989-01-19 07:37:46.793564000 1963-05-31 09:22:00.597388000 11447458481805267793701826147468882.757 +-1403037942 -1216519640 vql pkf sey 1944-01-09 19:35:41.804627000 1987-11-03 10:51:18.290933000 1542676412158580995566745947371674.575 +-1322427432 215463808 mxs aup xje 1965-12-19 03:45:50.293610000 1987-09-25 08:35:27.030011000 -14124993751133191541876413596201803.768 +122925564 214808438 hcn qcw sny 1964-09-11 07:39:03.273967000 1939-01-24 15:34:19.169731000 -16893828220783473973808809854176305.676 +1460339126 1646857428 rmx lgr fau 1992-10-05 14:07:23.838855000 1963-06-26 10:52:34.771336000 15726300901745541732021611566508197.501 +1540949636 -1216257492 yto vqc kfa 1978-03-03 22:19:07.720991000 1963-05-18 08:36:43.510414000 -21198632521163279342926136633361478.986 +-1308795736 -1216912862 doj dxs oau 1940-05-07 13:51:21.846309000 1987-10-21 10:06:01.203959000 18573020933480211312983939669051246.751 +28814432 215332734 nyt hsn bvh 1968-05-28 14:08:40.319258000 1939-02-19 17:04:53.343679000 -12614985800843200035489024435136990.932 +109424942 214677364 upb rdx gbm 1953-10-24 22:20:24.201394000 1939-01-11 14:49:02.082757000 14268216633100405943866612889866073.442 +1554581332 1646464206 pkf ytf bvq 1989-02-01 08:23:03.880538000 1963-06-13 10:07:17.684362000 11488997490508518082977892323702256.461 +-1402906868 -1216388566 aup toj gbv 1944-01-22 20:20:58.891601000 1987-11-16 11:36:35.377907000 1584215420861831284842812123605048.279 +-1322296358 -1217043936 qcw eyt cni 1966-01-01 04:31:07.380583000 1987-10-08 09:20:44.116985000 -14083454742429941252600347419968430.064 +123056638 214939512 lgr ugb wrd 1964-09-24 08:24:20.360940000 1939-02-06 16:19:36.256705000 -16852289212080223684532743677942931.972 +1460470200 1646988502 vqc pkv cwi 1992-10-18 14:52:40.925829000 1963-07-09 11:37:51.858310000 15767839910448792021297677742741571.205 +1541080710 1646333132 dxs aug oje 1978-03-16 23:04:24.807965000 1963-05-31 09:22:00.597388000 -21157093512460029053650070457128105.282 +-1308664662 -1216781788 hsn hcw sey 1940-05-20 14:36:38.933283000 1987-11-03 10:51:18.290933000 18614559942183461602260005845284620.455 +28945506 215463808 rdx lwr xje 1968-06-10 14:53:57.406232000 1939-03-04 17:50:10.430653000 -12573446792139949746212958258903617.228 +109556016 214808438 ytf vhc kfq 1953-11-06 23:05:41.288368000 1939-01-24 15:34:19.169731000 14309755641803656233142679066099447.146 +1554712406 1646595280 toj dxj fau 1989-02-14 09:08:20.967512000 1963-06-26 10:52:34.771336000 11530536499211768372253958499935630.165 +-1402775794 -1216257492 eyt xsn kfa 1944-02-04 21:06:15.978575000 1987-11-29 12:21:52.464881000 1625754429565081574118878299838421.983 +-1322165284 -1216912862 ugb idx grm 1966-01-14 05:16:24.467557000 1987-10-21 10:06:01.203959000 -14041915733726690963324281243735056.36 +123187712 215070586 pkv ykf bvh 1964-10-07 09:09:37.447914000 1939-02-19 17:04:53.343679000 -16810750203376973395256677501709558.268 +1460601274 214677364 aug toa gbm 1992-10-31 15:37:58.012803000 1963-07-22 12:23:08.945284000 15809378919152042310573743918974944.909 +1541211784 1646464206 hcw eyk sni 1978-03-29 23:49:41.894939000 1963-06-13 10:07:17.684362000 -21115554503756778764374004280894731.578 +-1308533588 -1216650714 lwr lgb gbv 1940-06-02 15:21:56.020257000 1987-11-16 11:36:35.377907000 18656098950886711891536072021517994.159 +29076580 -1217043936 vhc pbv cni 1968-06-23 15:39:14.493206000 1939-03-17 18:35:27.517627000 -12531907783436699456936892082670243.524 +109687090 214939512 dxj alg oju 1953-11-19 23:50:58.375342000 1939-02-06 16:19:36.256705000 14351294650506906522418745242332820.85 +1554843480 1646726354 xsn hcn cwi 1989-02-27 09:53:38.054486000 1963-07-09 11:37:51.858310000 11572075507915018661530024676169003.869 +-1402644720 1646333132 idx cwr oje 1944-02-17 21:51:33.065548000 1987-12-12 13:07:09.551855000 1667293438268331863394944476071795.687 +-1322034210 -1216781788 ykf mhc kvq 1966-01-27 06:01:41.554531000 1987-11-03 10:51:18.290933000 -14000376725023440674048215067501682.656 +123318786 215201660 toa doj xje 1964-10-20 09:54:54.534888000 1939-03-04 17:50:10.430653000 -16769211194673723105980611325476184.564 +1460732348 214808438 eyk xse kfq 1992-11-13 16:23:15.099777000 1938-12-29 14:03:44.995783000 15850917927855292599849810095208318.613 +1541342858 1646595280 lgb ido wrm 1978-04-12 00:34:58.981913000 1963-06-26 10:52:34.771336000 -21074015495053528475097938104661357.874 +-1308402514 -1216519640 pbv pkf kfa 1940-06-15 16:07:13.107231000 1987-11-29 12:21:52.464881000 18697637959589962180812138197751367.863 +29207654 -1216912862 alg tfa grm 1968-07-06 16:24:31.580180000 1987-09-25 08:35:27.030011000 -12490368774733449167660825906436869.82 +109818164 215070586 hcn epk sny 1953-12-03 00:36:15.462316000 1939-02-19 17:04:53.343679000 14392833659210156811694811418566194.554 +1554974554 1646857428 cwr lgr gbm 1989-03-12 10:38:55.141460000 1963-07-22 12:23:08.945284000 11613614516618268950806090852402377.573 +-1402513646 1646464206 mhc gbv sni 1944-03-01 22:36:50.152522000 1963-05-18 08:36:43.510414000 1708832446971582152671010652305169.391 +-1321903136 -1216650714 doj qlg oau 1966-02-09 06:46:58.641505000 1987-11-16 11:36:35.377907000 -13958837716320190384772148891268308.952 +123449860 215332734 xse hsn cni 1964-11-02 10:40:11.621862000 1939-03-17 18:35:27.517627000 -16727672185970472816704545149242810.86 +1460863422 214939512 ido cwi oju 1992-11-26 17:08:32.186751000 1939-01-11 14:49:02.082757000 15892456936558542889125876271441692.317 +1541473932 1646726354 pkf mhs bvq 1978-04-25 01:20:16.068887000 1963-07-09 11:37:51.858310000 -21032476486350278185821871928427984.17 +-1308271440 -1216388566 tfa toj oje 1940-06-28 16:52:30.194205000 1987-12-12 13:07:09.551855000 18739176968293212470088204373984741.567 +29338728 -1216781788 epk xje kvq 1968-07-19 17:09:48.667154000 1987-10-08 09:20:44.116985000 -12448829766030198878384759730203496.116 +109949238 215201660 lgr ito wrd 1953-12-16 01:21:32.549290000 1939-03-04 17:50:10.430653000 14434372667913407100970877594799568.258 +1555105628 1646988502 gbv pkv kfq 1989-03-25 11:24:12.228434000 1938-12-29 14:03:44.995783000 11655153525321519240082157028635751.277 +-1402382572 1646595280 qlg kfa wrm 1944-03-14 23:22:07.239496000 1963-05-31 09:22:00.597388000 1750371455674832441947076828538543.095 +-1321772062 \N hsn upk sey 1966-02-22 07:32:15.728479000 1987-11-29 12:21:52.464881000 -13917298707616940095496082715034935.248 +123580934 -1216519640 cwi lwr grm 1964-11-15 11:25:28.708836000 1987-09-25 08:35:27.030011000 -16686133177267222527428478973009437.156 +1460994496 215463808 mhs gbm sny 1992-12-09 17:53:49.273725000 1939-01-24 15:34:19.169731000 15933995945261793178401942447675066.021 +1541605006 215070586 toj qlw fau 1978-05-08 02:05:33.155861000 1963-07-22 12:23:08.945284000 -20990937477647027896545805752194610.466 +-1308140366 1646857428 xje xsn sni 1940-07-11 17:37:47.281179000 1963-05-18 08:36:43.510414000 18780715976996462759364270550218115.271 +29469802 -1216257492 ito cni oau 1968-08-01 17:55:05.754128000 1987-10-21 10:06:01.203959000 -12407290757326948589108693553970122.412 +110080312 -1216650714 pkv mxs bvh 1953-12-29 02:06:49.636264000 1939-03-17 18:35:27.517627000 14475911676616657390246943771032941.962 +1555236702 215332734 kfa toa oju 1989-04-07 12:09:29.315407000 1939-01-11 14:49:02.082757000 11696692534024769529358223204869124.981 +-1402251498 214677364 upk oje bvq 1944-03-28 00:07:24.326470000 1963-06-13 10:07:17.684362000 1791910464378082731223143004771916.799 +-1321640988 1646726354 lwr yto gbv 1966-03-07 08:17:32.815453000 1987-12-12 13:07:09.551855000 -13875759698913689806220016538801561.544 +123712008 -1216388566 gbm pbv kvq 1964-11-28 12:10:45.795810000 1987-10-08 09:20:44.116985000 -16644594168563972238152412796776063.452 +1461125570 -1217043936 qlw kfq wrd 1992-12-22 18:39:06.360699000 1939-02-06 16:19:36.256705000 15975534953965043467678008623908439.725 +1541736080 215201660 xsn upb cwi 1978-05-21 02:50:50.242835000 1938-12-29 14:03:44.995783000 -20949398468943777607269739575961236.762 +-1308009292 1646988502 cni cwr wrm 1940-07-24 18:23:04.368153000 1963-05-31 09:22:00.597388000 18822254985699713048640336726451488.975 +29600876 1646333132 mxs grm sey 1968-08-14 18:40:22.841102000 1987-11-03 10:51:18.290933000 -12365751748623698299832627377736748.708 +110211386 -1216519640 toa qcw xje 1954-01-11 02:52:06.723238000 1987-09-25 08:35:27.030011000 14517450685319907679523009947266315.666 +1555367776 215463808 oje xse sny 1989-04-20 12:54:46.402381000 1939-01-24 15:34:19.169731000 11738231542728019818634289381102498.685 +-1402120424 214808438 yto sni fau 1944-04-10 00:52:41.413444000 1963-06-26 10:52:34.771336000 1833449473081333020499209181005290.503 +-1321509914 1646857428 pbv dxs kfa 1966-03-20 09:02:49.902427000 1963-05-18 08:36:43.510414000 -13834220690210439516943950362568187.84 +123843082 -1216257492 kfq tfa oau 1964-12-11 12:56:02.882784000 1987-10-21 10:06:01.203959000 -16603055159860721948876346620542689.748 +1461256644 -1216912862 upb oju bvh 1993-01-04 19:24:23.447673000 1939-02-19 17:04:53.343679000 16017073962668293756954074800141813.429 +1541867154 215332734 cwr ytf gbm 1978-06-03 03:36:07.329809000 1939-01-11 14:49:02.082757000 -20907859460240527317993673399727863.058 +-1307878218 214677364 grm gbv bvq 1940-08-06 19:08:21.455127000 1963-06-13 10:07:17.684362000 18863793994402963337916402902684862.679 +29731950 1646464206 qcw kvq gbv 1968-08-27 19:25:39.928076000 1987-11-16 11:36:35.377907000 -12324212739920448010556561201503375.004 +110342460 -1216388566 xse ugb cni 1954-01-24 03:37:23.810212000 1987-10-08 09:20:44.116985000 14558989694023157968799076123499689.37 +1555498850 -1217043936 sni cwi wrd 1989-05-03 13:40:03.489355000 1939-02-06 16:19:36.256705000 11779770551431270107910355557335872.389 +-1401989350 214939512 dxs wrm cwi 1944-04-23 01:37:58.500418000 1963-07-09 11:37:51.858310000 1874988481784583309775275357238664.207 +-1321378840 1646988502 tfa hcw oje 1966-04-02 09:48:06.989401000 1963-05-31 09:22:00.597388000 -13792681681507189227667884186334814.136 +123974156 1646333132 oju xje sey 1964-12-24 13:41:19.969758000 1987-11-03 10:51:18.290933000 -16561516151157471659600280444309316.044 +1461387718 -1216781788 ytf sny xje 1993-01-17 20:09:40.534647000 1939-03-04 17:50:10.430653000 16058612971371544046230140976375187.133 +\N 215463808 gbv dxj kfq 1978-06-16 04:21:24.416783000 1939-01-24 15:34:19.169731000 -20866320451537277028717607223494489.354 +1541998228 214808438 kvq kfa fau 1940-08-19 19:53:38.542101000 1963-06-26 10:52:34.771336000 18905333003106213627192469078918236.383 +-1307747144 1646595280 ugb oau kfa 1968-09-09 20:10:57.015050000 1987-11-29 12:21:52.464881000 -12282673731217197721280495025270001.3 +29863024 -1216257492 cwi ykf grm 1954-02-06 04:22:40.897186000 1987-10-21 10:06:01.203959000 14600528702726408258075142299733063.074 +110473534 -1216912862 wrm gbm bvh 1989-05-16 14:25:20.576329000 1939-02-19 17:04:53.343679000 11821309560134520397186421733569246.093 +1555629924 215070586 hcw bvq gbm 1944-05-06 02:23:15.587392000 1963-07-22 12:23:08.945284000 1916527490487833599051341533472037.911 +-1401858276 214677364 xje lgb sni 1966-04-15 10:33:24.076375000 1963-06-13 10:07:17.684362000 -13751142672803938938391818010101440.432 +-1321247766 1646464206 sny cni gbv 1965-01-06 14:26:37.056732000 1987-11-16 11:36:35.377907000 -16519977142454221370324214268075942.34 +124105230 -1216650714 dxj wrd cni 1993-01-30 20:54:57.621621000 1939-03-17 18:35:27.517627000 16100151980074794335506207152608560.837 +1461518792 -1217043936 kfa hcn oju 1978-06-29 05:06:41.503757000 1939-02-06 16:19:36.256705000 -20824781442834026739441541047261115.65 +1542129302 214939512 oau oje cwi 1940-09-01 20:38:55.629075000 1963-07-09 11:37:51.858310000 18946872011809463916468535255151610.087 +-1307616070 1646726354 ykf sey oje 1968-09-22 20:56:14.102024000 1987-12-12 13:07:09.551855000 -12241134722513947432004428849036627.596 +29994098 1646333132 gbm doj kvq 1954-02-19 05:07:57.984160000 1987-11-03 10:51:18.290933000 14642067711429658547351208475966436.778 +110604608 -1216781788 bvq kfq xje 1989-05-29 15:10:37.663303000 1939-03-04 17:50:10.430653000 11862848568837770686462487909802619.797 +1555760998 215201660 lgb fau kfq 1944-05-19 03:08:32.674366000 1938-12-29 14:03:44.995783000 1958066499191083888327407709705411.615 +-1401727202 214808438 cni pkf wrm 1966-04-28 11:18:41.163349000 1963-06-26 10:52:34.771336000 -13709603664100688649115751833868066.728 +-1321116692 1646595280 wrd grm kfa 1965-01-19 15:11:54.143706000 1987-11-29 12:21:52.464881000 -16478438133750971081048148091842568.636 +124236304 -1216519640 hcn bvh grm 1993-02-12 21:40:14.708595000 1987-09-25 08:35:27.030011000 16141690988778044624782273328841934.541 +1461649866 -1216912862 oje lgr sny 1978-07-12 05:51:58.590731000 1939-02-19 17:04:53.343679000 -20783242434130776450165474871027741.946 +1542260376 215070586 sey sni gbm 1940-09-14 21:24:12.716049000 1963-07-22 12:23:08.945284000 18988411020512714205744601431384983.791 +-1307484996 1646857428 doj wid sni 1968-10-05 21:41:31.188998000 1963-05-18 08:36:43.510414000 -12199595713810697142728362672803253.892 +30125172 1646464206 kfq hsn oau 1954-03-04 05:53:15.071133000 1987-11-16 11:36:35.377907000 14683606720132908836627274652199810.482 +110735682 -1216650714 fau oju cni 1989-06-11 15:55:54.750277000 1939-03-17 18:35:27.517627000 11904387577541020975738554086035993.501 +1555892072 215332734 pkf jey oju 1944-06-01 03:53:49.761340000 1939-01-11 14:49:02.082757000 1999605507894334177603473885938785.319 +-1401596128 214939512 grm \N bvq 1966-05-11 12:03:58.250323000 1963-07-09 11:37:51.858310000 -13668064655397438359839685657634693.024 +-1320985618 1646726354 bvh toj oje 1965-02-01 15:57:11.230680000 1987-12-12 13:07:09.551855000 -16436899125047720791772081915609194.932 +124367378 -1216388566 lgr kvq kvq 1993-02-25 22:25:31.795569000 1987-10-08 09:20:44.116985000 16183229997481294914058339505075308.245 +1461780940 -1216781788 sni fal wrd 1978-07-25 06:37:15.677705000 1939-03-04 17:50:10.430653000 -20741703425427526160889408694794368.242 +1542391450 215201660 wid pkv kfq 1940-09-27 22:09:29.803023000 1938-12-29 14:03:44.995783000 19029950029215964495020667607618357.495 +-1307353922 1646988502 hsn wrm wrm 1968-10-18 22:26:48.275971000 1963-05-31 09:22:00.597388000 -12158056705107446853452296496569880.188 +30256246 1646595280 oju bmh sey 1954-03-17 06:38:32.158107000 1987-11-29 12:21:52.464881000 14725145728836159125903340828433184.186 +110866756 -1216519640 jey lwr grm 1989-06-24 16:41:11.837251000 1987-09-25 08:35:27.030011000 11945926586244271265014620262269367.205 +1556023146 215463808 toj sny sny 1944-06-14 04:39:06.848314000 1939-01-24 15:34:19.169731000 2041144516597584466879540062172159.023 +-1401465054 215070586 kvq nid fau 1966-05-24 12:49:15.337297000 1963-07-22 12:23:08.945284000 -13626525646694188070563619481401319.32 +-1320854544 1646857428 fal xsn sni 1965-02-14 16:42:28.317654000 1963-05-18 08:36:43.510414000 -16395360116344470502496015739375821.228 +124498452 -1216257492 pkv oau oau 1993-03-10 23:10:48.882543000 1987-10-21 10:06:01.203959000 16224769006184545203334405681308681.949 +1461912014 -1216650714 wrm jep bvh 1978-08-07 07:22:32.764679000 1939-03-17 18:35:27.517627000 -20700164416724275871613342518560994.538 +1542522524 215332734 bmh toa oju 1940-10-10 22:54:46.889997000 1939-01-11 14:49:02.082757000 19071489037919214784296733783851731.199 +-1307222848 214677364 lwr bvq bvq 1968-10-31 23:12:05.362945000 1963-06-13 10:07:17.684362000 -12116517696404196564176230320336506.484 +30387320 1646726354 sny fql \N 1954-03-30 07:23:49.245081000 1987-12-12 13:07:09.551855000 14766684737539409415179407004666557.89 +110997830 -1216388566 nid pbv gbv 1989-07-07 17:26:28.924225000 1987-10-08 09:20:44.116985000 11987465594947521554290686438502740.909 +\N -1217043936 xsn wrd kvq 1944-06-27 05:24:23.935288000 1939-02-06 16:19:36.256705000 2082683525300834756155606238405532.727 +1556154220 215201660 oau upb wrd 1966-06-06 13:34:32.424271000 1938-12-29 14:03:44.995783000 -13584986637990937781287553305167945.616 +-1401333980 1646988502 jep cwr cwi 1965-02-27 17:27:45.404628000 1963-05-31 09:22:00.597388000 -16353821107641220213219949563142447.524 +-1320723470 1646333132 toa sey wrm 1993-03-23 23:56:05.969517000 1987-11-03 10:51:18.290933000 16266308014887795492610471857542055.653 +124629526 -1216519640 bvq qcw sey 1978-08-20 08:07:49.851653000 1987-09-25 08:35:27.030011000 -20658625408021025582337276342327620.834 +1462043088 215463808 fql xse xje 1940-10-23 23:40:03.976971000 1939-01-24 15:34:19.169731000 19113028046622465073572799960085104.903 +1542653598 214808438 pbv fau sny 1968-11-13 23:57:22.449919000 1963-06-26 10:52:34.771336000 -12074978687700946274900164144103132.78 +-1307091774 1646857428 wrd dxs fau 1954-04-12 08:09:06.332055000 1963-05-18 08:36:43.510414000 14808223746242659704455473180899931.594 +30518394 -1216257492 rmh tfa kfa 1989-07-20 18:11:46.011199000 1987-10-21 10:06:01.203959000 12029004603650771843566752614736114.613 +111128904 -1216912862 cwr bvh oau 1944-07-10 06:09:41.022262000 1939-02-19 17:04:53.343679000 2124222534004085045431672414638906.431 +1556285294 215332734 sey ytf bvh 1966-06-19 14:19:49.511245000 1939-01-11 14:49:02.082757000 -13543447629287687492011487128934571.912 +-1401202906 214677364 nit gbv gbm 1965-03-12 18:13:02.491602000 1963-06-13 10:07:17.684362000 -16312282098937969923943883386909073.82 +-1320592396 1646464206 xse wid bvq 1993-04-06 00:41:23.056491000 1987-11-16 11:36:35.377907000 16307847023591045781886538033775429.357 +124760600 -1216388566 fau ugb gbv 1978-09-02 08:53:06.938627000 1987-10-08 09:20:44.116985000 -20617086399317775293061210166094247.13 +1462174162 -1217043936 jup cwi cni 1940-11-06 00:25:21.063945000 1939-02-06 16:19:36.256705000 19154567055325715362848866136318478.607 +1542784672 214939512 tfa jey wrd 1968-11-27 00:42:39.536893000 1963-07-09 11:37:51.858310000 -12033439678997695985624097967869759.076 +-1306960700 1646988502 bvh hcw cwi 1954-04-25 08:54:23.419029000 1963-05-31 09:22:00.597388000 14849762754945909993731539357133305.298 +30649468 1646333132 vql xje oje 1989-08-02 18:57:03.098173000 1987-11-03 10:51:18.290933000 12070543612354022132842818790969488.317 +111259978 -1216781788 gbv fal sey 1944-07-23 06:54:58.109236000 1939-03-04 17:50:10.430653000 2165761542707335334707738590872280.135 +1556416368 215463808 wid dxj xje 1966-07-02 15:05:06.598219000 1939-01-24 15:34:19.169731000 -13501908620584437202735420952701198.208 +-1401071832 214808438 rmx kfa kfq 1965-03-25 18:58:19.578576000 1963-06-26 10:52:34.771336000 -16270743090234719634667817210675700.116 +-1320461322 1646595280 cwi bmh fau 1993-04-19 01:26:40.143465000 1987-11-29 12:21:52.464881000 16349386032294296071162604210008803.061 +124891674 -1216257492 jey ykf kfa 1978-09-15 09:38:24.025600000 1987-10-21 10:06:01.203959000 -20575547390614525003785143989860873.426 +1462305236 -1216912862 nyt gbm grm 1940-11-19 01:10:38.150918000 1939-02-19 17:04:53.343679000 19196106064028965652124932312551852.311 +1542915746 215070586 xje nid bvh 1968-12-10 01:27:56.623867000 1963-07-22 12:23:08.945284000 -11991900670294445696348031791636385.372 +-1306829626 214677364 fal lgb gbm 1954-05-08 09:39:40.506003000 1963-06-13 10:07:17.684362000 14891301763649160283007605533366679.002 +30780542 1646464206 aup cni sni 1989-08-15 19:42:20.185147000 1987-11-16 11:36:35.377907000 12112082621057272422118884967202862.021 +111391052 \N kfa jep gbv 1944-08-05 07:40:15.196210000 1939-03-17 18:35:27.517627000 2207300551410585623983804767105653.839 +1556547442 -1216650714 bmh hcn cni 1966-07-15 15:50:23.685193000 1939-02-06 16:19:36.256705000 -13460369611881186913459354776467824.504 +-1400940758 -1217043936 vqc oje oju 1965-04-07 19:43:36.665550000 1963-07-09 11:37:51.858310000 -16229204081531469345391751034442326.412 +-1320330248 214939512 gbm fql cwi 1993-05-02 02:11:57.230438000 1987-12-12 13:07:09.551855000 16390925040997546360438670386242176.765 +125022748 1646726354 nid doj oje 1978-09-28 10:23:41.112574000 1987-11-03 10:51:18.290933000 -20534008381911274714509077813627499.722 +1462436310 1646333132 rdx kfq kvq 1940-12-02 01:55:55.237892000 1939-03-04 17:50:10.430653000 19237645072732215941400998488785226.015 +1543046820 -1216781788 cni rmh xje 1968-12-23 02:13:13.710841000 1938-12-29 14:03:44.995783000 -11950361661591195407071965615403011.668 +-1306698552 215201660 jep pkf kfq 1954-05-21 10:24:57.592977000 1963-06-26 10:52:34.771336000 14932840772352410572283671709600052.706 +30911616 214808438 eyt grm wrm 1989-08-28 20:27:37.272121000 1987-11-29 12:21:52.464881000 12153621629760522711394951143436235.725 +111522126 1646595280 oje nit kfa 1944-08-18 08:25:32.283184000 1987-09-25 08:35:27.030011000 2248839560113835913259870943339027.543 +1556678516 -1216519640 fql lgr grm 1966-07-28 16:35:40.772167000 1939-02-19 17:04:53.343679000 -13418830603177936624183288600234450.8 +-1400809684 -1216912862 aug sni sny 1965-04-20 20:28:53.752524000 1963-07-22 12:23:08.945284000 -16187665072828219056115684858208952.708 +-1320199174 215070586 kfq jup gbm 1993-05-15 02:57:14.317412000 1963-05-18 08:36:43.510414000 16432464049700796649714736562475550.469 +125153822 1646857428 rmh hsn sni 1978-10-11 11:08:58.199548000 1987-11-16 11:36:35.377907000 -20492469373208024425233011637394126.018 +1462567384 1646464206 vhc oju oau 1940-12-15 02:41:12.324866000 1939-03-17 18:35:27.517627000 19279184081435466230677064665018599.719 +1543177894 -1216650714 grm vql cni 1969-01-05 02:58:30.797815000 1939-01-11 14:49:02.082757000 -11908822652887945117795899439169637.964 +-1306567478 215332734 nit toj oju 1954-06-03 11:10:14.679951000 1963-07-09 11:37:51.858310000 14974379781055660861559737885833426.41 +31042690 214939512 idx kvq bvq 1989-09-10 21:12:54.359095000 1987-12-12 13:07:09.551855000 12195160638463773000671017319669609.429 +111653200 1646726354 sni rmx oje 1944-08-31 09:10:49.370158000 1987-10-08 09:20:44.116985000 2290378568817086202535937119572401.247 +1556809590 -1216388566 jup pkv kvq 1966-08-10 17:20:57.859141000 1939-03-04 17:50:10.430653000 -13377291594474686334907222424001077.096 +-1400678610 -1216781788 eyk wrm wrd 1965-05-03 21:14:10.839498000 1938-12-29 14:03:44.995783000 -16146126064124968766839618681975579.004 +-1320068100 215201660 oju nyt kfq 1993-05-28 03:42:31.404386000 1963-05-31 09:22:00.597388000 16474003058404046938990802738708924.173 +125284896 1646988502 vql lwr wrm 1978-10-24 11:54:15.286522000 1987-11-29 12:21:52.464881000 -20450930364504774135956945461160752.314 +1462698458 1646595280 alg sny sey 1940-12-28 03:26:29.411840000 1987-09-25 08:35:27.030011000 19320723090138716519953130841251973.423 +1543308968 -1216519640 kvq aup grm 1969-01-18 03:43:47.884789000 1939-01-24 15:34:19.169731000 -11867283644184694828519833262936264.26 +-1306436404 215463808 rmx xsn sny 1954-06-16 11:55:31.766925000 1963-07-22 12:23:08.945284000 15015918789758911150835804062066800.114 +31173764 215070586 mhc oau fau 1989-09-23 21:58:11.446069000 1963-05-18 08:36:43.510414000 12236699647167023289947083495902983.133 +111784274 1646857428 wrm vqc sni 1944-09-13 09:56:06.457132000 1987-10-21 10:06:01.203959000 2331917577520336491812003295805774.951 +1556940664 -1216257492 nyt toa oau 1966-08-23 18:06:14.946115000 1939-03-17 18:35:27.517627000 -13335752585771436045631156247767703.392 +-1400547536 -1216650714 ido bvq bvh 1965-05-16 21:59:27.926472000 1939-01-11 14:49:02.082757000 -16104587055421718477563552505742205.3 +-1319937026 215332734 sny rdx oju 1993-06-10 04:27:48.491360000 1963-06-13 10:07:17.684362000 16515542067107297228266868914942297.877 +125415970 214677364 aup pbv bvq 1978-11-06 12:39:32.373496000 1987-12-12 13:07:09.551855000 -20409391355801523846680879284927378.61 +1462829532 1646726354 epk wrd gbv 1941-01-10 04:11:46.498814000 1987-10-08 09:20:44.116985000 19362262098841966809229197017485347.127 +1543440042 -1216388566 oau eyt kvq 1969-01-31 04:29:04.971763000 1939-02-06 16:19:36.256705000 -11825744635481444539243767086702890.556 +-1306305330 -1217043936 vqc cwr wrd 1954-06-29 12:40:48.853899000 1938-12-29 14:03:44.995783000 15057457798462161440111870238300173.818 +31304838 215201660 qlg sey cwi 1989-10-06 22:43:28.533043000 1963-05-31 09:22:00.597388000 12278238655870273579223149672136356.837 +111915348 1646988502 bvq aug wrm 1944-09-26 10:41:23.544106000 1987-11-03 10:51:18.290933000 2373456586223586781088069472039148.655 +1557071738 1646333132 rdx xse sey 1966-09-05 18:51:32.033089000 1987-09-25 08:35:27.030011000 -13294213577068185756355090071534329.688 +-1400416462 -1216519640 mhs fau xje 1965-05-29 22:44:45.013446000 1939-01-24 15:34:19.169731000 -16063048046718468188287486329508831.596 +-1319805952 215463808 wrd vhc sny 1993-06-23 05:13:05.578334000 1963-06-26 10:52:34.771336000 16557081075810547517542935091175671.581 +125547044 214808438 eyt tfa fau 1978-11-19 13:24:49.460470000 1963-05-18 08:36:43.510414000 -20367852347098273557404813108694004.906 +1462960606 1646857428 ito bvh kfa 1941-01-23 04:57:03.585788000 1987-10-21 10:06:01.203959000 19403801107545217098505263193718720.831 +\N -1216257492 sey idx oau 1969-02-13 05:14:22.058737000 1939-02-19 17:04:53.343679000 -11784205626778194249967700910469516.852 +1543571116 -1216912862 aug gbv bvh 1954-07-12 13:26:05.940873000 1939-01-11 14:49:02.082757000 15098996807165411729387936414533547.522 +-1306174256 215332734 upk wid gbm 1989-10-19 23:28:45.620017000 1963-06-13 10:07:17.684362000 12319777664573523868499215848369730.541 +31435912 214677364 fau \N bvq 1944-10-09 11:26:40.631080000 1987-11-16 11:36:35.377907000 2414995594926837070364135648272522.359 +112046422 1646464206 vhc eyk gbv 1966-09-18 19:36:49.120063000 1987-10-08 09:20:44.116985000 -13252674568364935467079023895300955.984 +1557202812 -1216388566 qlw cwi cni 1965-06-11 23:30:02.100420000 1939-02-06 16:19:36.256705000 -16021509038015217899011420153275457.892 +\N -1217043936 bvh jey wrd 1993-07-06 05:58:22.665308000 1963-07-09 11:37:51.858310000 16598620084513797806819001267409045.285 +-1400285388 214939512 idx alg cwi 1978-12-02 14:10:06.547444000 1963-05-31 09:22:00.597388000 -20326313338395023268128746932460631.202 +-1319674878 1646988502 mxs xje oje 1941-02-05 05:42:20.672762000 1987-11-03 10:51:18.290933000 19445340116248467387781329369952094.535 +125678118 1646333132 wid fal sey 1969-02-26 05:59:39.145711000 1939-03-04 17:50:10.430653000 -11742666618074943960691634734236143.148 +1463091680 -1216781788 eyk mhc xje 1954-07-25 14:11:23.027847000 1939-01-24 15:34:19.169731000 15140535815868662018664002590766921.226 +1543702190 215463808 yto kfa kfq 1989-11-02 00:14:02.706991000 1963-06-26 10:52:34.771336000 12361316673276774157775282024603104.245 +\N 214808438 jey bmh fau 1944-10-22 12:11:57.718054000 1987-11-29 12:21:52.464881000 2456534603630087359640201824505896.063 +-1306043182 1646595280 alg ido kfa 1966-10-01 20:22:06.207037000 1987-10-21 10:06:01.203959000 -13211135559661685177802957719067582.28 +31566986 -1216257492 upb gbm grm 1965-06-25 00:15:19.187394000 1939-02-19 17:04:53.343679000 -15979970029311967609735353977042084.188 +112177496 -1216912862 fal nid bvh 1993-07-19 06:43:39.752282000 1963-07-22 12:23:08.945284000 16640159093217048096095067443642418.989 +1557333886 215070586 mhc epk gbm 1978-12-15 14:55:23.634418000 1963-06-13 10:07:17.684362000 -20284774329691772978852680756227257.498 +-1400154314 214677364 qcw cni sni 1941-02-18 06:27:37.759736000 1987-11-16 11:36:35.377907000 19486879124951717677057395546185468.239 +\N 1646464206 bmh jep gbv 1969-03-11 06:44:56.232685000 1939-03-17 18:35:27.517627000 -11701127609371693671415568558002769.444 +-1319543804 -1216650714 ido qlg cni 1954-08-07 14:56:40.114821000 1939-02-06 16:19:36.256705000 15182074824571912307940068767000294.93 +125809192 -1217043936 dxs oje oju 1989-11-15 00:59:19.793965000 1963-07-09 11:37:51.858310000 12402855681980024447051348200836477.949 +1463222754 214939512 nid fql cwi 1944-11-04 12:57:14.805028000 1987-12-12 13:07:09.551855000 2498073612333337648916268000739269.767 +1543833264 1646726354 epk mhs oje 1966-10-14 21:07:23.294011000 \N -13169596550958434888526891542834208.576 +-1305912108 1646333132 ytf kfq kvq 1965-07-08 01:00:36.274368000 1987-11-03 10:51:18.290933000 -15938431020608717320459287800808710.484 +\N -1216781788 jep rmh xje 1993-08-01 07:28:56.839256000 1939-03-04 17:50:10.430653000 16681698101920298385371133619875792.693 +31698060 215201660 qlg ito kfq 1978-12-28 15:40:40.721392000 1938-12-29 14:03:44.995783000 -20243235320988522689576614579993883.794 +112308570 214808438 ugb grm wrm 1941-03-03 07:12:54.846710000 1963-06-26 10:52:34.771336000 19528418133654967966333461722418841.943 +1557464960 1646595280 fql nit kfa 1969-03-24 07:30:13.319659000 1987-11-29 12:21:52.464881000 -11659588600668443382139502381769395.74 +-1400023240 -1216519640 mhs upk grm 1954-08-20 15:41:57.201795000 1987-09-25 08:35:27.030011000 15223613833275162597216134943233668.634 +-1319412730 -1216912862 hcw sni sny 1989-11-28 01:44:36.880939000 1939-02-19 17:04:53.343679000 12444394690683274736327414377069851.653 +\N 215070586 rmh jup gbm 1944-11-17 13:42:31.892002000 1963-07-22 12:23:08.945284000 2539612621036587938192334176972643.471 +125940266 1646857428 ito qlw sni 1966-10-27 21:52:40.380984000 1963-05-18 08:36:43.510414000 -13128057542255184599250825366600834.872 +1463353828 1646464206 dxj oju oau 1965-07-21 01:45:53.361341000 1987-11-16 11:36:35.377907000 -15896892011905467031183221624575336.78 +1543964338 -1216650714 nit vql cni 1993-08-14 08:14:13.926230000 1939-03-17 18:35:27.517627000 16723237110623548674647199796109166.397 +-1305781034 215332734 upk mxs oju 1979-01-10 16:25:57.808366000 1939-01-11 14:49:02.082757000 -20201696312285272400300548403760510.09 +31829134 214939512 ykf kvq bvq 1941-03-16 07:58:11.933684000 1963-07-09 11:37:51.858310000 19569957142358218255609527898652215.647 +\N 1646726354 jup rmx oje 1969-04-06 08:15:30.406633000 1987-12-12 13:07:09.551855000 -11618049591965193092863436205536022.036 +112439644 -1216388566 qlw yto kvq 1954-09-02 16:27:14.288769000 1987-10-08 09:20:44.116985000 15265152841978412886492201119467042.338 +1557596034 -1216781788 lgb wrm wrd 1989-12-11 02:29:53.967913000 1939-03-04 17:50:10.430653000 12485933699386525025603480553303225.357 +-1399892166 215201660 vql nyt kfq 1944-11-30 14:27:48.978976000 1938-12-29 14:03:44.995783000 2581151629739838227468400353206017.175 +-1319281656 1646988502 mxs upb wrm 1966-11-09 22:37:57.467958000 1963-05-31 09:22:00.597388000 -13086518533551934309974759190367461.168 +126071340 1646595280 hcn sny sey 1965-08-03 02:31:10.448315000 1987-11-29 12:21:52.464881000 -15855353003202216741907155448341963.076 +\N -1216519640 rmx aup grm 1993-08-27 08:59:31.013204000 1987-09-25 08:35:27.030011000 16764776119326798963923265972342540.101 +1463484902 215463808 yto qcw sny 1979-01-23 17:11:14.895340000 1939-01-24 15:34:19.169731000 -20160157303582022111024482227527136.386 +1544095412 215070586 doj oau fau 1941-03-29 08:43:29.020658000 1963-07-22 12:23:08.945284000 19611496151061468544885594074885589.351 +-1305649960 1646857428 nyt vqc sni 1969-04-19 09:00:47.493607000 1963-05-18 08:36:43.510414000 -11576510583261942803587370029302648.332 +31960208 -1216257492 upb dxs oau 1954-09-15 17:12:31.375743000 1987-10-21 10:06:01.203959000 15306691850681663175768267295700416.042 +112570718 -1216650714 pkf bvq bvh 1989-12-24 03:15:11.054887000 1939-03-17 18:35:27.517627000 12527472708089775314879546729536599.061 +\N 215332734 aup rdx oju 1944-12-13 15:13:06.065949000 1939-01-11 14:49:02.082757000 2622690638443088516744466529439390.879 +1557727108 214677364 qcw ytf bvq 1966-11-22 23:23:14.554932000 1963-06-13 10:07:17.684362000 -13044979524848684020698693014134087.464 +-1399761092 1646726354 lgr wrd gbv 1965-08-16 03:16:27.535289000 1987-12-12 13:07:09.551855000 -15813813994498966452631089272108589.372 +-1319150582 -1216388566 vqc eyt kvq 1993-09-09 09:44:48.100178000 1987-10-08 09:20:44.116985000 16806315128030049253199332148575913.805 +126202414 -1217043936 dxs ugb wrd 1979-02-05 17:56:31.982314000 1939-02-06 16:19:36.256705000 -20118618294878771821748416051293762.682 +1463615976 215201660 hsn sey cwi 1941-04-11 09:28:46.107632000 1938-12-29 14:03:44.995783000 19653035159764718834161660251118963.055 +\N 1646988502 rdx aug wrm 1969-05-02 09:46:04.580581000 1963-05-31 09:22:00.597388000 -11534971574558692514311303853069274.628 +1544226486 1646333132 ytf hcw sey 1954-09-28 17:57:48.462717000 1987-11-03 10:51:18.290933000 15348230859384913465044333471933789.746 +-1305518886 -1216519640 toj fau xje 1990-01-06 04:00:28.141861000 1987-09-25 08:35:27.030011000 12569011716793025604155612905769972.765 +32091282 215463808 eyt vhc sny 1944-12-26 15:58:23.152923000 1939-01-24 15:34:19.169731000 2664229647146338806020532705672764.583 +112701792 214808438 ugb dxj fau 1966-12-06 00:08:31.641906000 1963-06-26 10:52:34.771336000 -13003440516145433731422626837900713.76 +1557858182 1646857428 pkv bvh kfa 1965-08-29 04:01:44.622263000 1963-05-18 08:36:43.510414000 -15772274985795716163355023095875215.668 +\N -1216257492 aug idx oau 1993-09-22 10:30:05.187152000 1987-10-21 10:06:01.203959000 16847854136733299542475398324809287.509 +-1399630018 -1216912862 hcw ykf bvh 1979-02-18 18:41:49.069288000 1939-02-19 17:04:53.343679000 -20077079286175521532472349875060388.978 +-1319019508 215332734 lwr wid gbm 1941-04-24 10:14:03.194606000 1939-01-11 14:49:02.082757000 19694574168467969123437726427352336.759 +126333488 214677364 vhc eyk bvq 1969-05-15 10:31:21.667555000 1963-06-13 10:07:17.684362000 -11493432565855442225035237676835900.924 +1463747050 1646464206 dxj lgb gbv 1954-10-11 18:43:05.549691000 1987-11-16 11:36:35.377907000 15389769868088163754320399648167163.45 +1544357560 -1216388566 xsn jey cni 1990-01-19 04:45:45.228835000 1987-10-08 09:20:44.116985000 12610550725496275893431679082003346.469 +\N -1217043936 idx alg wrd 1945-01-08 16:43:40.239897000 1939-02-06 16:19:36.256705000 2705768655849589095296598881906138.287 +-1305387812 214939512 ykf hcn cwi 1966-12-19 00:53:48.728880000 1963-07-09 11:37:51.858310000 -12961901507442183442146560661667340.056 +32222356 1646988502 \N fal oje 1965-09-11 04:47:01.709237000 1963-05-31 09:22:00.597388000 -15730735977092465874078956919641841.964 +112832866 1646333132 toa mhc sey 1993-10-05 11:15:22.274126000 1987-11-03 10:51:18.290933000 16889393145436549831751464501042661.213 +1557989256 -1216781788 eyk doj xje 1979-03-03 19:27:06.156262000 1939-03-04 17:50:10.430653000 -20035540277472271243196283698827015.274 +-1399498944 215463808 lgb bmh kfq 1941-05-07 10:59:20.281580000 1939-01-24 15:34:19.169731000 19736113177171219412713792603585710.463 +\N 214808438 pbv ido fau 1969-05-28 11:16:38.754529000 1963-06-26 10:52:34.771336000 -11451893557152191935759171500602527.22 +-1318888434 1646595280 alg pkf kfa 1954-10-24 19:28:22.636665000 1987-11-29 12:21:52.464881000 15431308876791414043596465824400537.154 +126464562 -1216257492 hcn nid grm 1990-02-01 05:31:02.315808000 1987-10-21 10:06:01.203959000 12652089734199526182707745258236720.173 +1463878124 -1216912862 cwr epk bvh 1945-01-21 17:28:57.326871000 1939-02-19 17:04:53.343679000 2747307664552839384572665058139511.991 +1544488634 215070586 mhc lgr gbm 1967-01-01 01:39:05.815854000 1963-07-22 12:23:08.945284000 -12920362498738933152870494485433966.352 +-1305256738 214677364 doj jep sni 1965-09-24 05:32:18.796211000 1963-06-13 10:07:17.684362000 -15689196968389215584802890743408468.26 +\N 1646464206 xse qlg gbv 1993-10-18 12:00:39.361100000 1987-11-16 11:36:35.377907000 16930932154139800121027530677276034.917 +32353430 -1216650714 ido hsn cni 1979-03-16 20:12:23.243236000 1939-03-17 18:35:27.517627000 -19994001268769020953920217522593641.57 +112963940 -1217043936 pkf fql oju 1941-05-20 11:44:37.368554000 1939-02-06 16:19:36.256705000 19777652185874469701989858779819084.167 +1558120330 214939512 tfa mhs cwi 1969-06-10 12:01:55.841503000 1963-07-09 11:37:51.858310000 -11410354548448941646483105324369153.516 +-1399367870 1646726354 epk toj oje 1954-11-06 20:13:39.723639000 1987-12-12 13:07:09.551855000 15472847885494664332872532000633910.858 +-1318757360 1646333132 lgr rmh kvq 1990-02-14 06:16:19.402782000 1987-11-03 10:51:18.290933000 12693628742902776471983811434470093.877 +\N -1216781788 gbv ito xje 1945-02-03 18:14:14.413845000 1939-03-04 17:50:10.430653000 2788846673256089673848731234372885.695 +126595636 215201660 qlg pkv kfq 1967-01-14 02:24:22.902828000 1938-12-29 14:03:44.995783000 -12878823490035682863594428309200592.648 +1464009198 214808438 hsn nit wrm 1965-10-07 06:17:35.883185000 1963-06-26 10:52:34.771336000 -15647657959685965295526824567175094.556 +1544619708 1646595280 cwi upk kfa 1993-10-31 12:45:56.448074000 1987-11-29 12:21:52.464881000 16972471162843050410303596853509408.621 +-1305125664 -1216519640 mhs lwr grm 1979-03-29 20:57:40.330210000 1987-09-25 08:35:27.030011000 -19952462260065770664644151346360267.866 +32484504 -1216912862 toj jup sny 1941-06-02 12:29:54.455528000 1939-02-19 17:04:53.343679000 19819191194577719991265924956052457.871 +\N 215070586 xje qlw gbm 1969-06-23 12:47:12.928477000 1963-07-22 12:23:08.945284000 -11368815539745691357207039148135779.812 +113095014 1646857428 ito xsn sni 1954-11-19 20:58:56.810613000 1963-05-18 08:36:43.510414000 15514386894197914622148598176867284.562 +1558251404 1646464206 pkv vql oau 1990-02-27 07:01:36.489756000 1987-11-16 11:36:35.377907000 12735167751606026761259877610703467.581 +-1399236796 -1216650714 kfa mxs cni 1945-02-16 18:59:31.500819000 1939-03-17 18:35:27.517627000 2830385681959339963124797410606259.399 +-1318626286 215332734 upk toa oju 1967-01-27 03:09:39.989802000 1939-01-11 14:49:02.082757000 -12837284481332432574318362132967218.944 +126726710 214939512 lwr rmx bvq 1965-10-20 07:02:52.970159000 1963-07-09 11:37:51.858310000 -15606118950982715006250758390941720.852 +\N 1646726354 gbm yto oje 1993-11-13 13:31:13.535048000 1987-12-12 13:07:09.551855000 17014010171546300699579663029742782.325 +1464140272 -1216388566 qlw pbv kvq 1979-04-11 21:42:57.417184000 1987-10-08 09:20:44.116985000 -19910923251362520375368085170126894.162 +1544750782 -1216781788 xsn nyt wrd 1941-06-15 13:15:11.542502000 1939-03-04 17:50:10.430653000 19860730203280970280541991132285831.575 +-1304994590 215201660 cni upb kfq 1969-07-06 13:32:30.015451000 1938-12-29 14:03:44.995783000 -11327276531042441067930972971902406.108 +32615578 1646988502 mxs cwr wrm 1954-12-02 21:44:13.897587000 1963-05-31 09:22:00.597388000 15555925902901164911424664353100658.266 +113226088 1646595280 toa aup sey 1990-03-12 07:46:53.576730000 1987-11-29 12:21:52.464881000 12776706760309277050535943786936841.285 +\N -1216519640 oje qcw grm 1945-03-01 19:44:48.587793000 1987-09-25 08:35:27.030011000 2871924690662590252400863586839633.103 +1558382478 215463808 yto xse sny 1967-02-09 03:54:57.076776000 1939-01-24 15:34:19.169731000 -12795745472629182285042295956733845.24 +-1399105722 215070586 pbv vqc fau 1965-11-02 07:48:10.057133000 1963-07-22 12:23:08.945284000 -15564579942279464716974692214708347.148 +-1318495212 1646857428 kfq dxs sni 1993-11-26 14:16:30.622022000 1963-05-18 08:36:43.510414000 17055549180249550988855729205976156.029 +126857784 -1216257492 upb tfa oau 1979-04-24 22:28:14.504158000 1987-10-21 10:06:01.203959000 -19869384242659270086092018993893520.458 +1464271346 -1216650714 cwr rdx bvh 1941-06-28 14:00:28.629476000 1939-03-17 18:35:27.517627000 19902269211984220569818057308519205.279 +\N 215332734 grm ytf oju 1969-07-19 14:17:47.102425000 1939-01-11 14:49:02.082757000 -11285737522339190778654906795669032.404 +1544881856 214677364 qcw gbv bvq 1954-12-15 22:29:30.984561000 1963-06-13 10:07:17.684362000 15597464911604415200700730529334031.97 +-1304863516 1646726354 xse eyt gbv 1990-03-25 08:32:10.663704000 1987-12-12 13:07:09.551855000 12818245769012527339812009963170214.989 +32746652 -1216388566 sni ugb kvq 1945-03-14 20:30:05.674767000 1987-10-08 09:20:44.116985000 2913463699365840541676929763073006.807 +113357162 -1217043936 dxs cwi wrd 1967-02-22 04:40:14.163750000 1939-02-06 16:19:36.256705000 -12754206463925931995766229780500471.536 +1558513552 215201660 tfa aug cwi 1965-11-15 08:33:27.144107000 1938-12-29 14:03:44.995783000 -15523040933576214427698626038474973.444 +\N 1646988502 oju hcw wrm 1993-12-09 15:01:47.708996000 1963-05-31 09:22:00.597388000 17097088188952801278131795382209529.733 +-1398974648 1646333132 ytf xje sey 1979-05-07 23:13:31.591132000 1987-11-03 10:51:18.290933000 -19827845233956019796815952817660146.754 +-1318364138 -1216519640 gbv vhc xje 1941-07-11 14:45:45.716450000 1987-09-25 08:35:27.030011000 19943808220687470859094123484752578.983 +126988858 215463808 kvq dxj sny 1969-08-01 15:03:04.189398000 1939-01-24 15:34:19.169731000 -11244198513635940489378840619435658.7 +1464402420 214808438 ugb kfa fau 1954-12-28 23:14:48.071534000 1963-06-26 10:52:34.771336000 15639003920307665489976796705567405.674 +1545012930 1646857428 cwi idx kfa 1990-04-07 09:17:27.750678000 1963-05-18 08:36:43.510414000 12859784777715777629088076139403588.693 +\N -1216257492 wrm ykf oau 1945-03-27 21:15:22.761741000 1987-10-21 10:06:01.203959000 2955002708069090830952995939306380.511 +-1304732442 -1216912862 hcw gbm bvh 1967-03-07 05:25:31.250724000 1939-02-19 17:04:53.343679000 -12712667455222681706490163604267097.832 +32877726 215332734 xje eyk gbm 1965-11-28 09:18:44.231081000 1939-01-11 14:49:02.082757000 -15481501924872964138422559862241599.74 +113488236 214677364 sny lgb bvq 1993-12-22 15:47:04.795970000 1963-06-13 10:07:17.684362000 17138627197656051567407861558442903.437 +1558644626 1646464206 dxj cni gbv 1979-05-20 23:58:48.678106000 1987-11-16 11:36:35.377907000 -19786306225252769507539886641426773.05 +-1398843574 -1216388566 kfa alg cni 1941-07-24 15:31:02.803424000 1987-10-08 09:20:44.116985000 19985347229390721148370189660985952.687 +\N -1217043936 oau hcn wrd 1969-08-14 15:48:21.276372000 1939-02-06 16:19:36.256705000 -11202659504932690200102774443202284.996 +-1318233064 214939512 ykf oje cwi 1955-01-11 00:00:05.158508000 1963-07-09 11:37:51.858310000 15680542929010915779252862881800779.378 +127119932 1646988502 gbm mhc oje 1990-04-20 10:02:44.837652000 1963-05-31 09:22:00.597388000 12901323786419027918364142315636962.397 +1464533494 1646333132 bvq doj sey 1945-04-09 22:00:39.848715000 1987-11-03 10:51:18.290933000 2996541716772341120229062115539754.215 +1545144004 -1216781788 lgb kfq xje 1967-03-20 06:10:48.337698000 1939-03-04 17:50:10.430653000 -12671128446519431417214097428033724.128 +-1304601368 215463808 cni ido kfq 1965-12-11 10:04:01.318055000 1939-01-24 15:34:19.169731000 -15439962916169713849146493686008226.036 +\N 214808438 wrd pkf fau 1994-01-04 16:32:21.882944000 1963-06-26 10:52:34.771336000 17180166206359301856683927734676277.141 +33008800 1646595280 hcn grm kfa 1979-06-03 00:44:05.765080000 1987-11-29 12:21:52.464881000 -19744767216549519218263820465193399.346 +113619310 -1216257492 oje epk grm 1941-08-06 16:16:19.890398000 1987-10-21 10:06:01.203959000 20026886238093971437646255837219326.391 +1558775700 -1216912862 sey lgr bvh 1969-08-27 16:33:38.363346000 1939-02-19 17:04:53.343679000 -11161120496229439910826708266968911.292 +-1398712500 215070586 doj sni gbm 1955-01-24 00:45:22.245482000 1963-07-22 12:23:08.945284000 15722081937714166068528929058034153.082 +-1318101990 214677364 kfq qlg sni 1990-05-03 10:48:01.924626000 1963-06-13 10:07:17.684362000 12942862795122278207640208491870336.101 +\N 1646464206 fau hsn gbv 1945-04-22 22:45:56.935689000 1987-11-16 11:36:35.377907000 3038080725475591409505128291773127.919 +127251006 -1216650714 pkf oju cni 1967-04-02 06:56:05.424672000 1939-03-17 18:35:27.517627000 -12629589437816181127938031251800350.424 +1464664568 -1217043936 grm mhs oju 1965-12-24 10:49:18.405029000 1939-02-06 16:19:36.256705000 -15398423907466463559870427509774852.332 +1545275078 214939512 bvh toj cwi 1994-01-17 17:17:38.969918000 1963-07-09 11:37:51.858310000 17221705215062552145959993910909650.845 +-1304470294 1646726354 lgr kvq oje 1979-06-16 01:29:22.852054000 1987-12-12 13:07:09.551855000 -19703228207846268928987754288960025.642 +33139874 1646333132 sni ito kvq 1941-08-19 17:01:36.977372000 1987-11-03 10:51:18.290933000 20068425246797221726922322013452700.095 +\N -1216781788 wid pkv xje 1969-09-09 17:18:55.450320000 1939-03-04 17:50:10.430653000 -11119581487526189621550642090735537.588 +113750384 215201660 hsn wrm kfq 1955-02-06 01:30:39.332456000 1938-12-29 14:03:44.995783000 15763620946417416357804995234267526.786 +1558906774 214808438 oju upk wrm 1990-05-16 11:33:19.011600000 1963-06-26 10:52:34.771336000 12984401803825528496916274668103709.805 +-1398581426 1646595280 jey lwr kfa 1945-05-05 23:31:14.022663000 1987-11-29 12:21:52.464881000 3079619734178841698781194468006501.623 +-1317970916 -1216519640 toj sny grm 1967-04-15 07:41:22.511646000 1987-09-25 08:35:27.030011000 -12588050429112930838661965075566976.72 +127382080 -1216912862 kvq qlw sny 1966-01-06 11:34:35.492003000 1939-02-19 17:04:53.343679000 -15356884898763213270594361333541478.628 +\N 215070586 fal xsn gbm 1994-01-30 18:02:56.056892000 1963-07-22 12:23:08.945284000 17263244223765802435236060087143024.549 +1464795642 1646857428 pkv oau sni 1979-06-29 02:14:39.939028000 1963-05-18 08:36:43.510414000 -19661689199143018639711688112726651.938 +1545406152 1646464206 wrm mxs oau 1941-09-01 17:46:54.064346000 1987-11-16 11:36:35.377907000 20109964255500472016198388189686073.799 +-1304339220 -1216650714 bmh toa cni 1969-09-22 18:04:12.537294000 1939-03-17 18:35:27.517627000 -11078042478822939332274575914502163.884 +33270948 215332734 lwr bvq oju 1955-02-19 02:15:56.419430000 1939-01-11 14:49:02.082757000 15805159955120666647081061410500900.49 +113881458 214939512 sny yto bvq 1990-05-29 12:18:36.098574000 1963-07-09 11:37:51.858310000 13025940812528778786192340844337083.509 +\N 1646726354 nid pbv oje 1945-05-19 00:16:31.109637000 1987-12-12 13:07:09.551855000 3121158742882091988057260644239875.327 +1559037848 -1216388566 xsn wrd kvq 1967-04-28 08:26:39.598620000 1987-10-08 09:20:44.116985000 -12546511420409680549385898899333603.016 +-1398450352 -1216781788 oau upb wrd 1966-01-19 12:19:52.578977000 1939-03-04 17:50:10.430653000 -15315345890059962981318295157308104.924 +-1317839842 215201660 jep cwr kfq 1994-02-12 18:48:13.143865000 1938-12-29 14:03:44.995783000 17304783232469052724512126263376398.253 +127513154 1646988502 toa sey wrm 1979-07-12 02:59:57.026001000 1963-05-31 09:22:00.597388000 -19620150190439768350435621936493278.234 +1464926716 1646595280 bvq qcw sey 1941-09-14 18:32:11.151319000 1987-11-29 12:21:52.464881000 20151503264203722305474454365919447.503 +\N -1216519640 fql xse grm 1969-10-05 18:49:29.624268000 1987-09-25 08:35:27.030011000 -11036503470119689042998509738268790.18 +1545537226 215463808 pbv fau sny 1955-03-04 03:01:13.506404000 1939-01-24 15:34:19.169731000 15846698963823916936357127586734274.194 +-1304208146 215070586 wrd dxs fau 1990-06-11 13:03:53.185548000 1963-07-22 12:23:08.945284000 13067479821232029075468407020570457.213 +33402022 1646857428 rmh tfa sni 1945-06-01 01:01:48.196611000 1963-05-18 08:36:43.510414000 3162697751585342277333326820473249.031 +114012532 -1216257492 cwr bvh oau 1967-05-11 09:11:56.685594000 1987-10-21 10:06:01.203959000 -12504972411706430260109832723100229.312 +1559168922 -1216650714 sey ytf bvh 1966-02-01 13:05:09.665951000 1939-03-17 18:35:27.517627000 -15273806881356712692042228981074731.22 +\N 215332734 nit gbv oju 1994-02-25 19:33:30.230839000 1939-01-11 14:49:02.082757000 17346322241172303013788192439609771.957 +-1398319278 214677364 xse wid bvq 1979-07-25 03:45:14.112975000 1963-06-13 10:07:17.684362000 -19578611181736518061159555760259904.53 +-1317708768 1646726354 fau ugb gbv 1941-09-27 19:17:28.238293000 1987-12-12 13:07:09.551855000 20193042272906972594750520542152821.207 +127644228 -1216388566 jup cwi kvq 1969-10-18 19:34:46.711242000 1987-10-08 09:20:44.116985000 -10994964461416438753722443562035416.476 +1465057790 -1217043936 tfa jey wrd 1955-03-17 03:46:30.593378000 1939-02-06 16:19:36.256705000 15888237972527167225633193762967647.898 +1545668300 215201660 bvh hcw cwi \N 1938-12-29 14:03:44.995783000 13109018829935279364744473196803830.917 +\N 1646988502 vql xje wrm 1990-06-24 13:49:10.272522000 1963-05-31 09:22:00.597388000 3204236760288592566609392996706622.735 +-1304077072 1646333132 gbv fal sey 1945-06-14 01:47:05.283585000 1987-11-03 10:51:18.290933000 -12463433403003179970833766546866855.608 +33533096 -1216519640 wid dxj xje 1967-05-24 09:57:13.772568000 1987-09-25 08:35:27.030011000 -15232267872653462402766162804841357.516 +114143606 215463808 rmx kfa sny 1966-02-14 13:50:26.752925000 1939-01-24 15:34:19.169731000 17387861249875553303064258615843145.661 +1559299996 214808438 cwi bmh fau 1994-03-10 20:18:47.317813000 1963-06-26 10:52:34.771336000 -19537072173033267771883489584026530.826 +-1398188204 1646857428 jey ykf kfa 1979-08-07 04:30:31.199949000 1963-05-18 08:36:43.510414000 20234581281610222884026586718386194.911 +\N -1216257492 nyt gbm oau 1941-10-10 20:02:45.325267000 1987-10-21 10:06:01.203959000 -10953425452713188464446377385802042.772 +-1317577694 -1216912862 xje nid bvh 1969-10-31 20:20:03.798216000 1939-02-19 17:04:53.343679000 15929776981230417514909259939201021.602 +127775302 215332734 fal lgb gbm 1955-03-30 04:31:47.680352000 1939-01-11 14:49:02.082757000 13150557838638529654020539373037204.621 +1465188864 214677364 aup cni bvq 1990-07-07 14:34:27.359496000 1963-06-13 10:07:17.684362000 3245775768991842855885459172939996.439 +1545799374 1646464206 kfa jep gbv 1945-06-27 02:32:22.370559000 1987-11-16 11:36:35.377907000 -12421894394299929681557700370633481.904 +-1303945998 -1216388566 bmh hcn cni 1967-06-06 10:42:30.859542000 1987-10-08 09:20:44.116985000 -15190728863950212113490096628607983.812 +\N -1217043936 vqc oje wrd 1966-02-27 14:35:43.839899000 1939-02-06 16:19:36.256705000 17429400258578803592340324792076519.365 +33664170 214939512 gbm fql cwi 1994-03-23 21:04:04.404787000 1963-07-09 11:37:51.858310000 -19495533164330017482607423407793157.122 +114274680 1646988502 nid doj oje 1979-08-20 05:15:48.286923000 1963-05-31 09:22:00.597388000 20276120290313473173302652894619568.615 +1559431070 1646333132 rdx kfq sey 1941-10-23 20:48:02.412241000 1987-11-03 10:51:18.290933000 -10911886444009938175170311209568669.068 +-1398057130 -1216781788 cni rmh xje 1969-11-13 21:05:20.885190000 1939-03-04 17:50:10.430653000 15971315989933667804185326115434395.306 +-1317446620 215463808 jep pkf kfq 1955-04-12 05:17:04.767326000 1939-01-24 15:34:19.169731000 13192096847341779943296605549270578.325 +\N 214808438 eyt grm fau 1990-07-20 15:19:44.446470000 1963-06-26 10:52:34.771336000 3287314777695093145161525349173370.143 +127906376 1646595280 oje nit kfa 1945-07-10 03:17:39.457533000 1987-11-29 12:21:52.464881000 -12380355385596679392281634194400108.2 +1465319938 -1216257492 fql lgr grm 1967-06-19 11:27:47.946516000 1987-10-21 10:06:01.203959000 -15149189855246961824214030452374610.108 +1545930448 -1216912862 aug sni bvh 1966-03-12 15:21:00.926873000 1939-02-19 17:04:53.343679000 17470939267282053881616390968309893.069 +-1303814924 215070586 \N jup gbm 1994-04-05 21:49:21.491761000 1963-07-22 12:23:08.945284000 -19453994155626767193331357231559783.418 +33795244 214677364 kfq hsn sni 1979-09-02 06:01:05.373897000 1963-06-13 10:07:17.684362000 20317659299016723462578719070852942.319 +\N 1646464206 rmh oju gbv 1941-11-05 21:33:19.499215000 1987-11-16 11:36:35.377907000 -10870347435306687885894245033335295.364 +114405754 -1216650714 vhc vql cni 1969-11-26 21:50:37.972164000 1939-03-17 18:35:27.517627000 16012854998636918093461392291667769.01 +1559562144 -1217043936 grm toj oju 1955-04-25 06:02:21.854300000 1939-02-06 16:19:36.256705000 13233635856045030232572671725503952.029 +-1397926056 214939512 nit kvq cwi 1990-08-02 16:05:01.533444000 1963-07-09 11:37:51.858310000 3328853786398343434437591525406743.847 +-1317315546 1646726354 idx rmx oje 1945-07-23 04:02:56.544507000 1987-12-12 13:07:09.551855000 -12338816376893429103005568018166734.496 +128037450 1646333132 sni pkv kvq 1967-07-02 12:13:05.033490000 1987-11-03 10:51:18.290933000 -15107650846543711534937964276141236.404 +\N -1216781788 jup wrm xje 1966-03-25 16:06:18.013847000 1939-03-04 17:50:10.430653000 17512478275985304170892457144543266.773 +1465451012 215201660 eyk nyt kfq 1994-04-18 22:34:38.578735000 1938-12-29 14:03:44.995783000 -19412455146923516904055291055326409.714 +1546061522 214808438 oju lwr wrm 1979-09-15 06:46:22.460871000 1963-06-26 10:52:34.771336000 20359198307719973751854785247086316.023 +-1303683850 1646595280 vql sny kfa 1941-11-18 22:18:36.586189000 1987-11-29 12:21:52.464881000 -10828808426603437596618178857101921.66 +33926318 -1216519640 alg aup grm 1969-12-09 22:35:55.059138000 1987-09-25 08:35:27.030011000 16054394007340168382737458467901142.714 +114536828 -1216912862 kvq xsn sny 1955-05-08 06:47:38.941274000 1939-02-19 17:04:53.343679000 13275174864748280521848737901737325.733 +\N 215070586 rmx oau gbm 1990-08-15 16:50:18.620418000 1963-07-22 12:23:08.945284000 3370392795101593723713657701640117.551 +1559693218 1646857428 mhc vqc sni 1945-08-05 04:48:13.631481000 1963-05-18 08:36:43.510414000 -12297277368190178813729501841933360.792 +-1397794982 1646464206 wrm toa oau 1967-07-15 12:58:22.120464000 1987-11-16 11:36:35.377907000 -15066111837840461245661898099907862.7 +-1317184472 -1216650714 nyt bvq cni 1966-04-07 16:51:35.100821000 1939-03-17 18:35:27.517627000 17554017284688554460168523320776640.477 +128168524 215332734 ido rdx oju 1994-05-01 23:19:55.665709000 1939-01-11 14:49:02.082757000 -19370916138220266614779224879093036.01 +1465582086 214939512 sny pbv bvq 1979-09-28 07:31:39.547845000 1963-07-09 11:37:51.858310000 20400737316423224041130851423319689.727 +\N 1646726354 aup wrd oje 1941-12-01 23:03:53.673163000 1987-12-12 13:07:09.551855000 -10787269417900187307342112680868547.956 +1546192596 -1216388566 epk eyt kvq 1969-12-22 23:21:12.146112000 1987-10-08 09:20:44.116985000 16095933016043418672013524644134516.418 +-1303552776 -1216781788 oau cwr wrd 1955-05-21 07:32:56.028248000 1939-03-04 17:50:10.430653000 13316713873451530811124804077970699.437 +34057392 215201660 vqc sey kfq 1990-08-28 17:35:35.707392000 1938-12-29 14:03:44.995783000 3411931803804844012989723877873491.255 +114667902 1646988502 qlg aug wrm 1945-08-18 05:33:30.718455000 1963-05-31 09:22:00.597388000 -12255738359486928524453435665699987.088 +1559824292 1646595280 bvq xse sey 1967-07-28 13:43:39.207438000 1987-11-29 12:21:52.464881000 -15024572829137210956385831923674488.996 +\N -1216519640 rdx fau grm 1966-04-20 17:36:52.187795000 1987-09-25 08:35:27.030011000 17595556293391804749444589497010014.181 +-1397663908 215463808 mhs vhc sny 1994-05-15 00:05:12.752683000 1939-01-24 15:34:19.169731000 -19329377129517016325503158702859662.306 +-1317053398 215070586 wrd tfa fau 1979-10-11 08:16:56.634819000 1963-07-22 12:23:08.945284000 20442276325126474330406917599553063.431 +128299598 1646857428 eyt bvh sni 1941-12-14 23:49:10.760137000 1963-05-18 08:36:43.510414000 -10745730409196937018066046504635174.252 +1465713160 -1216257492 ito idx oau 1933-06-24 00:08:04.626239000 1987-10-21 10:06:01.203959000 16137472024746668961289590820367890.122 +1546323670 -1216650714 sey gbv bvh 1955-06-03 08:18:13.115222000 1939-03-17 18:35:27.517627000 13358252882154781100400870254204073.141 +\N 215332734 aug wid oju 1990-09-10 18:20:52.794366000 1939-01-11 14:49:02.082757000 3453470812508094302265790054106864.959 +-1303421702 214677364 upk eyk bvq 1945-08-31 06:18:47.805429000 1963-06-13 10:07:17.684362000 -12214199350783678235177369489466613.384 +34188466 1646726354 fau cwi gbv 1967-08-10 14:28:56.294412000 1987-12-12 13:07:09.551855000 \N +114798976 -1216388566 vhc jey kvq 1966-05-03 18:22:09.274768000 1987-10-08 09:20:44.116985000 -14983033820433960667109765747441115.292 +1559955366 -1217043936 qlw alg wrd 1994-05-28 00:50:29.839657000 1939-02-06 16:19:36.256705000 17637095302095055038720655673243387.885 +-1397532834 215201660 bvh xje cwi 1979-10-24 09:02:13.721793000 1938-12-29 14:03:44.995783000 -19287838120813766036227092526626288.602 +\N 1646988502 idx fal wrm 1941-12-28 00:34:27.847111000 1963-05-31 09:22:00.597388000 20483815333829724619682983775786437.135 +-1316922324 1646333132 mxs mhc sey 1933-07-07 00:53:21.713213000 1987-11-03 10:51:18.290933000 -10704191400493686728789980328401800.548 +128430672 -1216519640 wid kfa xje 1955-06-16 09:03:30.202196000 1987-09-25 08:35:27.030011000 16179011033449919250565656996601263.826 +1465844234 215463808 eyk bmh sny 1990-09-23 19:06:09.881340000 1939-01-24 15:34:19.169731000 13399791890858031389676936430437446.845 +1546454744 214808438 yto ido fau 1945-09-13 07:04:04.892403000 1963-06-26 10:52:34.771336000 3495009821211344591541856230340238.663 +-1303290628 1646857428 jey gbm kfa 1967-08-23 15:14:13.381385000 1963-05-18 08:36:43.510414000 -12172660342080427945901303313233239.68 +\N -1216257492 alg nid oau 1966-05-16 19:07:26.361742000 1987-10-21 10:06:01.203959000 -14941494811730710377833699571207741.588 +34319540 -1216912862 upb epk bvh 1994-06-10 01:35:46.926631000 1939-02-19 17:04:53.343679000 17678634310798305327996721849476761.589 +114930050 215332734 fal cni gbm 1979-11-06 09:47:30.808767000 1939-01-11 14:49:02.082757000 -19246299112110515746951026350392914.898 +1560086440 214677364 mhc jep bvq 1942-01-10 01:19:44.934085000 1963-06-13 10:07:17.684362000 20525354342532974908959049952019810.839 +-1397401760 1646464206 qcw qlg gbv 1933-07-20 01:38:38.800187000 1987-11-16 11:36:35.377907000 -10662652391790436439513914152168426.844 +-1316791250 -1216388566 bmh oje cni 1955-06-29 09:48:47.289170000 1987-10-08 09:20:44.116985000 16220550042153169539841723172834637.53 +\N -1217043936 ido fql wrd 1990-10-06 19:51:26.968314000 1939-02-06 16:19:36.256705000 13441330899561281678953002606670820.549 +128561746 214939512 dxs mhs \N 1945-09-26 07:49:21.979376000 1963-07-09 11:37:51.858310000 3536548829914594880817922406573612.367 +1465975308 1646988502 nid kfq cwi 1967-09-05 15:59:30.468359000 1963-05-31 09:22:00.597388000 -12131121333377177656625237136999865.976 +1546585818 1646333132 epk rmh oje 1966-05-29 19:52:43.448716000 1987-11-03 10:51:18.290933000 -14899955803027460088557633394974367.884 +-1303159554 -1216781788 ytf ito sey 1994-06-23 02:21:04.013605000 1939-03-04 17:50:10.430653000 17720173319501555617272788025710135.293 +34450614 215463808 jep grm xje 1979-11-19 10:32:47.895741000 1939-01-24 15:34:19.169731000 -19204760103407265457674960174159541.194 +\N 214808438 qlg nit kfq 1942-01-23 02:05:02.021059000 1963-06-26 10:52:34.771336000 20566893351236225198235116128253184.543 +115061124 1646595280 ugb upk fau 1933-08-02 02:23:55.887161000 1987-11-29 12:21:52.464881000 -10621113383087186150237847975935053.14 +1560217514 -1216257492 fql sni kfa 1955-07-12 10:34:04.376144000 1987-10-21 10:06:01.203959000 16262089050856419829117789349068011.234 +-1397270686 -1216912862 mhs jup grm 1990-10-19 20:36:44.055288000 1939-02-19 17:04:53.343679000 13482869908264531968229068782904194.253 +-1316660176 215070586 hcw qlw bvh 1945-10-09 08:34:39.066350000 1963-07-22 12:23:08.945284000 3578087838617845170093988582806986.071 +128692820 214677364 rmh oju gbm 1967-09-18 16:44:47.555333000 1963-06-13 10:07:17.684362000 -12089582324673927367349170960766492.272 +\N 1646464206 ito vql sni 1966-06-11 20:38:00.535690000 1987-11-16 11:36:35.377907000 -14858416794324209799281567218740994.18 +1466106382 -1216650714 dxj mxs gbv 1994-07-06 03:06:21.100579000 1939-03-17 18:35:27.517627000 17761712328204805906548854201943508.997 +1546716892 -1217043936 nit kvq cni 1979-12-02 11:18:04.982715000 1939-02-06 16:19:36.256705000 -19163221094704015168398893997926167.49 +-1303028480 214939512 upk rmx oju 1942-02-05 02:50:19.108033000 1963-07-09 11:37:51.858310000 20608432359939475487511182304486558.247 +34581688 1646726354 ykf yto cwi 1933-08-15 03:09:12.974135000 1987-12-12 13:07:09.551855000 -10579574374383935860961781799701679.436 +115192198 1646333132 jup wrm oje 1955-07-25 11:19:21.463118000 1987-11-03 10:51:18.290933000 16303628059559670118393855525301384.938 +\N -1216781788 qlw nyt kvq 1990-11-01 21:22:01.142262000 1939-03-04 17:50:10.430653000 13524408916967782257505134959137567.957 +1560348588 215201660 lgb upb xje 1945-10-22 09:19:56.153324000 1938-12-29 14:03:44.995783000 3619626847321095459370054759040359.775 +-1397139612 214808438 vql sny kfq 1967-10-01 17:30:04.642307000 1963-06-26 10:52:34.771336000 -12048043315970677078073104784533118.568 +-1316529102 1646595280 mxs aup wrm 1966-06-24 21:23:17.622664000 1987-11-29 12:21:52.464881000 -14816877785620959510005501042507620.476 +128823894 -1216519640 hcn qcw kfa 1994-07-19 03:51:38.187553000 1987-09-25 08:35:27.030011000 17803251336908056195824920378176882.701 +1466237456 -1216912862 rmx oau grm 1979-12-15 12:03:22.069689000 1939-02-19 17:04:53.343679000 -19121682086000764879122827821692793.786 +\N 215070586 yto vqc sny 1942-02-18 03:35:36.195007000 1963-07-22 12:23:08.945284000 20649971368642725776787248480719931.951 +1546847966 1646857428 doj dxs gbm 1933-08-28 03:54:30.061109000 1963-05-18 08:36:43.510414000 -10538035365680685571685715623468305.732 +-1302897406 1646464206 nyt bvq \N 1955-08-07 12:04:38.550092000 1987-11-16 11:36:35.377907000 16345167068262920407669921701534758.642 +34712762 -1216650714 upb rdx sni 1990-11-14 22:07:18.229236000 1939-03-17 18:35:27.517627000 13565947925671032546781201135370941.661 +115323272 215332734 pkf ytf oau 1945-11-04 10:05:13.240298000 1939-01-11 14:49:02.082757000 \N +1560479662 214939512 aup wrd cni 1967-10-14 18:15:21.729281000 1963-07-09 11:37:51.858310000 3661165856024345748646120935273733.479 +\N 1646726354 qcw eyt oju 1966-07-07 22:08:34.709638000 1987-12-12 13:07:09.551855000 -12006504307267426788797038608299744.864 +-1397008538 -1216388566 lgr ugb bvq 1994-08-01 04:36:55.274527000 1987-10-08 09:20:44.116985000 -14775338776917709220729434866274246.772 +-1316398028 -1216781788 vqc sey oje 1979-12-28 12:48:39.156663000 1939-03-04 17:50:10.430653000 17844790345611306485100986554410256.405 +128954968 215201660 dxs aug kvq 1942-03-03 04:20:53.281981000 1938-12-29 14:03:44.995783000 -19080143077297514589846761645459420.082 +1466368530 1646988502 hsn hcw wrd 1933-09-10 04:39:47.148083000 1963-05-31 09:22:00.597388000 20691510377345976066063314656953305.655 +1546979040 1646595280 rdx fau kfq 1955-08-20 12:49:55.637066000 1987-11-29 12:21:52.464881000 -10496496356977435282409649447234932.028 +\N -1216519640 ytf vhc wrm 1990-11-27 22:52:35.316209000 1987-09-25 08:35:27.030011000 16386706076966170696945987877768132.346 +-1302766332 215463808 toj dxj sey 1945-11-17 10:50:30.327272000 1939-01-24 15:34:19.169731000 13607486934374282836057267311604315.365 +34843836 215070586 eyt bvh grm 1967-10-27 19:00:38.816255000 1963-07-22 12:23:08.945284000 3702704864727596037922187111507107.183 +115454346 1646857428 ugb idx sny 1966-07-20 22:53:51.796612000 1963-05-18 08:36:43.510414000 -11964965298564176499520972432066371.16 +1560610736 -1216257492 pkv ykf fau 1994-08-14 05:22:12.361501000 1987-10-21 10:06:01.203959000 -14733799768214458931453368690040873.068 +-1396877464 -1216650714 aug wid sni 1980-01-10 13:33:56.243637000 1939-03-17 18:35:27.517627000 17886329354314556774377052730643630.109 +\N 215332734 hcw eyk oau 1942-03-16 05:06:10.368955000 1939-01-11 14:49:02.082757000 -19038604068594264300570695469226046.378 +-1316266954 214677364 lwr lgb bvh 1933-09-23 05:25:04.235057000 1963-06-13 10:07:17.684362000 20733049386049226355339380833186679.359 +129086042 1646726354 vhc jey oju 1955-09-02 13:35:12.724040000 1987-12-12 13:07:09.551855000 -10454957348274184993133583271001558.324 +1466499604 -1216388566 dxj alg bvq 1990-12-10 23:37:52.403183000 1987-10-08 09:20:44.116985000 16428245085669420986222054054001506.05 +1547110114 -1217043936 xsn hcn gbv 1945-11-30 11:35:47.414246000 1939-02-06 16:19:36.256705000 13649025943077533125333333487837689.069 +-1302635258 215201660 idx fal kvq 1967-11-09 19:45:55.903229000 1938-12-29 14:03:44.995783000 3744243873430846327198253287740480.887 +\N 1646988502 ykf mhc wrd 1966-08-02 23:39:08.883586000 1963-05-31 09:22:00.597388000 -11923426289860926210244906255832997.456 +34974910 \N toa doj cwi 1994-08-27 06:07:29.448475000 1987-11-03 10:51:18.290933000 -14692260759511208642177302513807499.364 +115585420 1646333132 eyk bmh wrm 1980-01-23 14:19:13.330611000 1987-09-25 08:35:27.030011000 17927868363017807063653118906877003.813 +1560741810 -1216519640 lgb ido sey 1942-03-29 05:51:27.455929000 1939-01-24 15:34:19.169731000 -18997065059891014011294629292992672.674 +-1396746390 215463808 pbv pkf xje 1933-10-06 06:10:21.322031000 1963-06-26 10:52:34.771336000 20774588394752476644615447009420053.063 +-1316135880 214808438 alg nid sny 1955-09-15 14:20:29.811014000 1963-05-18 08:36:43.510414000 -10413418339570934703857517094768184.62 +\N 1646857428 hcn epk fau 1990-12-24 00:23:09.490157000 1987-10-21 10:06:01.203959000 16469784094372671275498120230234879.754 +129217116 -1216257492 cwr lgr kfa 1945-12-13 12:21:04.501220000 1939-02-19 17:04:53.343679000 13690564951780783414609399664071062.773 +1466630678 -1216912862 mhc jep oau 1967-11-22 20:31:12.990203000 1939-01-11 14:49:02.082757000 3785782882134096616474319463973854.591 +1547241188 215332734 doj qlg bvh 1966-08-16 00:24:25.970560000 1963-06-13 10:07:17.684362000 -11881887281157675920968840079599623.752 +-1302504184 214677364 xse hsn gbm 1994-09-09 06:52:46.535449000 1987-11-16 11:36:35.377907000 -14650721750807958352901236337574125.66 +35105984 1646464206 ido fql bvq 1980-02-05 15:04:30.417585000 1987-10-08 09:20:44.116985000 17969407371721057352929185083110377.517 +\N -1216388566 \N mhs gbv 1942-04-11 06:36:44.542903000 1939-02-06 16:19:36.256705000 -18955526051187763722018563116759298.97 +115716494 -1217043936 pkf toj cni 1933-10-19 06:55:38.409005000 1963-07-09 11:37:51.858310000 20816127403455726933891513185653426.767 +1560872884 214939512 tfa rmh wrd 1955-09-28 15:05:46.897988000 1963-05-31 09:22:00.597388000 -10371879330867684414581450918534810.916 +-1396615316 1646988502 epk ito cwi 1991-01-06 01:08:26.577131000 1987-11-03 10:51:18.290933000 16511323103075921564774186406468253.458 +-1316004806 1646333132 lgr pkv oje 1945-12-26 13:06:21.588194000 1939-03-04 17:50:10.430653000 13732103960484033703885465840304436.477 +129348190 -1216781788 gbv nit sey 1967-12-05 21:16:30.077177000 1939-01-24 15:34:19.169731000 3827321890837346905750385640207228.295 +\N 215463808 qlg upk xje 1966-08-29 01:09:43.057534000 1963-06-26 10:52:34.771336000 -11840348272454425631692773903366250.048 +1466761752 214808438 hsn lwr kfq 1994-09-22 07:38:03.622423000 1987-11-29 12:21:52.464881000 -14609182742104708063625170161340751.956 +1547372262 1646595280 cwi jup fau 1980-02-18 15:49:47.504559000 1987-10-21 10:06:01.203959000 18010946380424307642205251259343751.221 +-1302373110 -1216257492 mhs qlw kfa 1942-04-24 07:22:01.629877000 1939-02-19 17:04:53.343679000 -18913987042484513432742496940525925.266 +35237058 -1216912862 toj xsn grm 1933-11-01 07:40:55.495979000 1963-07-22 12:23:08.945284000 20857666412158977223167579361886800.471 +115847568 215070586 xje vql bvh 1955-10-11 15:51:03.984961000 1963-06-13 10:07:17.684362000 -10330340322164434125305384742301437.212 +\N 214677364 ito mxs gbm 1991-01-19 01:53:43.664105000 1987-11-16 11:36:35.377907000 16552862111779171854050252582701627.162 +1561003958 1646464206 pkv toa sni 1946-01-08 13:51:38.675168000 1939-03-17 18:35:27.517627000 13773642969187283993161532016537810.181 +-1396484242 -1216650714 kfa rmx gbv 1967-12-18 22:01:47.164151000 1939-02-06 16:19:36.256705000 3868860899540597195026451816440601.999 +-1315873732 -1217043936 upk yto cni 1966-09-11 01:55:00.144508000 1963-07-09 11:37:51.858310000 -11798809263751175342416707727132876.344 +129479264 214939512 lwr pbv oju 1994-10-05 08:23:20.709397000 1987-12-12 13:07:09.551855000 -14567643733401457774349103985107378.252 +1466892826 1646726354 gbm nyt cwi 1980-03-02 16:35:04.591533000 1987-11-03 10:51:18.290933000 18052485389127557931481317435577124.925 +\N 1646333132 qlw upb oje 1942-05-07 08:07:18.716851000 1939-03-04 17:50:10.430653000 -18872448033781263143466430764292551.562 +1547503336 -1216781788 xsn cwr kvq 1933-11-14 08:26:12.582953000 1938-12-29 14:03:44.995783000 20899205420862227512443645538120174.175 +-1302242036 215201660 cni aup xje 1955-10-24 16:36:21.071935000 1963-06-26 10:52:34.771336000 -10288801313461183836029318566068063.508 +35368132 214808438 mxs qcw kfq 1991-02-01 02:39:00.751079000 1987-11-29 12:21:52.464881000 16594401120482422143326318758935000.866 +115978642 1646595280 toa xse wrm 1946-01-21 14:36:55.762142000 1987-09-25 08:35:27.030011000 13815181977890534282437598192771183.885 +1561135032 -1216519640 oje vqc kfa 1967-12-31 22:47:04.251125000 1939-02-19 17:04:53.343679000 3910399908243847484302517992673975.703 +\N -1216912862 yto dxs grm 1966-09-24 02:40:17.231482000 1963-07-22 12:23:08.945284000 -11757270255047925053140641550899502.64 +-1396353168 215070586 pbv tfa sny 1994-10-18 09:08:37.796371000 1963-05-18 08:36:43.510414000 -14526104724698207485073037808874004.548 +-1315742658 1646857428 kfq rdx gbm 1980-03-15 17:20:21.678507000 1987-11-16 11:36:35.377907000 18094024397830808220757383611810498.629 +129610338 1646464206 upb ytf sni 1942-05-20 08:52:35.803825000 1939-03-17 18:35:27.517627000 -18830909025078012854190364588059177.858 +1467023900 -1216650714 cwr gbv oau 1933-11-27 09:11:29.669926000 1939-01-11 14:49:02.082757000 20940744429565477801719711714353547.879 +1547634410 215332734 grm eyt cni 1955-11-06 17:21:38.158909000 1963-07-09 11:37:51.858310000 -10247262304757933546753252389834689.804 +\N 214939512 qcw ugb oju 1991-02-14 03:24:17.838053000 1987-12-12 13:07:09.551855000 16635940129185672432602384935168374.57 +-1302110962 1646726354 xse cwi bvq 1946-02-03 15:22:12.849116000 1987-10-08 09:20:44.116985000 13856720986593784571713664369004557.589 +35499206 -1216388566 sni aug oje 1968-01-13 23:32:21.338099000 1939-03-04 17:50:10.430653000 3951938916947097773578584168907349.407 +116109716 -1216781788 dxs hcw kvq 1966-10-07 03:25:34.318456000 1938-12-29 14:03:44.995783000 -11715731246344674763864575374666128.936 +1561266106 215201660 tfa xje wrd 1994-10-31 09:53:54.883345000 1963-05-31 09:22:00.597388000 -14484565715994957195796971632640630.844 +-1396222094 1646988502 oju vhc kfq 1980-03-28 18:05:38.765481000 1987-11-29 12:21:52.464881000 18135563406534058510033449788043872.333 +\N 1646595280 ytf dxj wrm 1942-06-02 09:37:52.890799000 1987-09-25 08:35:27.030011000 -18789370016374762564914298411825804.154 +-1315611584 -1216519640 gbv kfa sey 1933-12-10 09:56:46.756900000 1939-01-24 15:34:19.169731000 20982283438268728090995777890586921.583 +129741412 215463808 kvq idx grm 1955-11-19 18:06:55.245883000 1963-07-22 12:23:08.945284000 -10205723296054683257477186213601316.1 +1467154974 215070586 ugb ykf sny 1991-02-27 04:09:34.925027000 1963-05-18 08:36:43.510414000 16677479137888922721878451111401748.274 +1547765484 1646857428 cwi gbm fau 1946-02-16 16:07:29.936090000 1987-10-21 10:06:01.203959000 13898259995297034860989730545237931.293 +-1301979888 -1216257492 wrm eyk sni 1968-01-27 00:17:38.425073000 1939-03-17 18:35:27.517627000 3993477925650348062854650345140723.111 +\N -1216650714 hcw lgb oau 1966-10-20 04:10:51.405430000 1939-01-11 14:49:02.082757000 -11674192237641424474588509198432755.232 +35630280 215332734 xje cni bvh 1994-11-13 10:39:11.970319000 1963-06-13 10:07:17.684362000 -14443026707291706906520905456407257.14 +116240790 214677364 sny alg oju 1980-04-10 18:50:55.852455000 1987-12-12 13:07:09.551855000 18177102415237308799309515964277246.037 +1561397180 1646726354 dxj hcn bvq 1942-06-15 10:23:09.977773000 1987-10-08 09:20:44.116985000 -18747831007671512275638232235592430.45 +-1396091020 -1216388566 kfa oje gbv 1933-12-23 10:42:03.843874000 1939-02-06 16:19:36.256705000 21023822446971978380271844066820295.287 +-1315480510 -1217043936 oau mhc kvq 1955-12-02 18:52:12.332857000 1938-12-29 14:03:44.995783000 -10164184287351432968201120037367942.396 +\N 215201660 ykf doj wrd 1991-03-12 04:54:52.012001000 1963-05-31 09:22:00.597388000 16719018146592173011154517287635121.978 +129872486 1646988502 gbm kfq cwi 1946-03-01 16:52:47.023064000 1987-11-03 10:51:18.290933000 13939799004000285150265796721471304.997 +1467286048 1646333132 bvq ido wrm 1968-02-09 01:02:55.512047000 1987-09-25 08:35:27.030011000 4035016934353598352130716521374096.815 +1547896558 -1216519640 lgb pkf sey 1966-11-02 04:56:08.492404000 1939-01-24 15:34:19.169731000 -11632653228938174185312443022199381.528 +-1301848814 215463808 cni grm xje 1994-11-26 11:24:29.057293000 1963-06-26 10:52:34.771336000 -14401487698588456617244839280173883.436 +35761354 214808438 wrd epk sny 1980-04-23 19:36:12.939428000 1963-05-18 08:36:43.510414000 18218641423940559088585582140510619.741 +\N 1646857428 hcn lgr fau 1942-06-28 11:08:27.064746000 1987-10-21 10:06:01.203959000 -18706291998968261986362166059359056.746 +116371864 -1216257492 oje sni kfa 1934-01-05 11:27:20.930848000 1939-02-19 17:04:53.343679000 21065361455675228669547910243053668.991 +1561528254 -1216912862 sey qlg oau 1955-12-15 19:37:29.419831000 1939-01-11 14:49:02.082757000 -10122645278648182678925053861134568.692 +-1395959946 215332734 doj hsn bvh 1991-03-25 05:40:09.098975000 1963-06-13 10:07:17.684362000 16760557155295423300430583463868495.682 +-1315349436 214677364 kfq oju gbm 1946-03-14 17:38:04.110038000 1987-11-16 11:36:35.377907000 13981338012703535439541862897704678.701 +130003560 1646464206 fau mhs bvq 1968-02-22 01:48:12.599021000 1987-10-08 09:20:44.116985000 4076555943056848641406782697607470.519 +\N -1216388566 pkf toj gbv 1966-11-15 05:41:25.579378000 1939-02-06 16:19:36.256705000 -11591114220234923896036376845966007.824 +1467417122 -1217043936 grm kvq cni 1994-12-09 12:09:46.144266000 1963-07-09 11:37:51.858310000 -14359948689885206327968773103940509.732 +1548027632 214939512 bvh ito wrd 1980-05-06 20:21:30.026402000 1963-05-31 09:22:00.597388000 18260180432643809377861648316743993.445 +-1301717740 1646988502 lgr pkv cwi 1942-07-11 11:53:44.151720000 1987-11-03 10:51:18.290933000 -18664752990265011697086099883125683.042 +35892428 1646333132 sni wrm oje 1934-01-18 12:12:38.017822000 1939-03-04 17:50:10.430653000 21106900464378478958823976419287042.695 +116502938 -1216781788 wid upk sey 1955-12-28 20:22:46.506805000 1939-01-24 15:34:19.169731000 -10081106269944932389648987684901194.988 +\N 215463808 hsn lwr xje 1991-04-07 06:25:26.185949000 1963-06-26 10:52:34.771336000 16802096163998673589706649640101869.386 +1561659328 214808438 oju sny kfq 1946-03-27 18:23:21.197012000 1987-11-29 12:21:52.464881000 14022877021406785728817929073938052.405 +-1395828872 1646595280 jey qlw fau 1968-03-06 02:33:29.685995000 1987-10-21 10:06:01.203959000 4118094951760098930682848873840844.223 +-1315218362 -1216257492 toj xsn kfa 1966-11-28 06:26:42.666352000 1939-02-19 17:04:53.343679000 -11549575211531673606760310669732634.12 +130134634 -1216912862 kvq oau grm 1994-12-22 12:55:03.231240000 1963-07-22 12:23:08.945284000 -14318409681181956038692706927707136.028 +1467548196 215070586 fal mxs bvh 1980-05-19 21:06:47.113376000 1963-06-13 10:07:17.684362000 18301719441347059667137714492977367.149 +\N 214677364 pkv toa gbm 1942-07-24 12:39:01.238694000 1987-11-16 11:36:35.377907000 -18623213981561761407810033706892309.338 +\N 1646464206 wrm bvq sni 1934-01-31 12:57:55.104796000 1939-03-17 18:35:27.517627000 21148439473081729248100042595520416.399 +1548158706 -1216650714 bmh yto gbv 1956-01-10 21:08:03.593779000 1939-02-06 16:19:36.256705000 -10039567261241682100372921508667821.284 +-1301586666 -1217043936 lwr pbv cni 1991-04-20 07:10:43.272923000 1963-07-09 11:37:51.858310000 16843635172701923878982715816335243.09 +36023502 214939512 sny wrd oju 1946-04-09 19:08:38.283986000 1987-12-12 13:07:09.551855000 14064416030110036018093995250171426.109 +116634012 1646726354 nid upb cwi 1968-03-19 03:18:46.772969000 1987-11-03 10:51:18.290933000 4159633960463349219958915050074217.927 +\N 1646333132 xsn cwr oje 1966-12-11 07:11:59.753326000 1939-03-04 17:50:10.430653000 -11508036202828423317484244493499260.416 +1561790402 -1216781788 oau sey kvq 1995-01-04 13:40:20.318214000 1938-12-29 14:03:44.995783000 -14276870672478705749416640751473762.324 +-1395697798 215201660 jep qcw xje 1980-06-01 21:52:04.200350000 1963-06-26 10:52:34.771336000 18343258450050309956413780669210740.853 +-1315087288 214808438 toa xse kfq 1942-08-06 13:24:18.325668000 1987-11-29 12:21:52.464881000 -18581674972858511118533967530658935.634 +130265708 1646595280 bvq fau wrm 1934-02-13 13:43:12.191770000 1987-09-25 08:35:27.030011000 21189978481784979537376108771753790.103 +1467679270 -1216519640 fql dxs kfa 1956-01-23 21:53:20.680753000 1939-02-19 17:04:53.343679000 -9998028252538431811096855332434447.58 +\N -1216912862 pbv tfa grm 1991-05-03 07:56:00.359897000 1963-07-22 12:23:08.945284000 16885174181405174168258781992568616.794 +1548289780 215070586 wrd bvh sny 1946-04-22 19:53:55.370960000 1963-05-18 08:36:43.510414000 14105955038813286307370061426404799.813 +-1301455592 1646857428 rmh ytf gbm 1968-04-01 04:04:03.859943000 1987-11-16 11:36:35.377907000 4201172969166599509234981226307591.631 +36154576 1646464206 cwr gbv sni 1966-12-24 07:57:16.840300000 1939-03-17 18:35:27.517627000 -11466497194125173028208178317265886.712 +116765086 -1216650714 sey wid oau 1995-01-17 14:25:37.405188000 1939-01-11 14:49:02.082757000 -14235331663775455460140574575240388.62 +1561921476 215332734 nit ugb cni 1980-06-14 22:37:21.287324000 1963-07-09 11:37:51.858310000 18384797458753560245689846845444114.557 +\N 214939512 xse cwi oju 1942-08-19 14:09:35.412642000 1987-12-12 13:07:09.551855000 -18540135964155260829257901354425561.93 +-1395566724 1646726354 fau jey bvq 1934-02-26 14:28:29.278744000 1987-10-08 09:20:44.116985000 21231517490488229826652174947987163.807 +-1314956214 -1216388566 jup hcw oje 1956-02-05 22:38:37.767727000 1939-03-04 17:50:10.430653000 -9956489243835181521820789156201073.876 +130396782 -1216781788 tfa xje kvq 1991-05-16 08:41:17.446871000 1938-12-29 14:03:44.995783000 16926713190108424457534848168801990.498 +1467810344 215201660 bvh fal wrd 1946-05-05 20:39:12.457934000 1963-05-31 09:22:00.597388000 14147494047516536596646127602638173.517 +1548420854 1646988502 vql dxj kfq 1968-04-14 04:49:20.946917000 1987-11-29 12:21:52.464881000 4242711977869849798511047402540965.335 +\N 1646595280 gbv kfa wrm 1967-01-06 08:42:33.927274000 1987-09-25 08:35:27.030011000 -11424958185421922738932112141032513.008 +-1301324518 -1216519640 wid bmh sey 1995-01-30 15:10:54.492162000 1939-01-24 15:34:19.169731000 -14193792655072205170864508399007014.916 +36285650 215463808 rmx ykf grm 1980-06-27 23:22:38.374298000 1963-07-22 12:23:08.945284000 18426336467456810534965913021677488.261 +116896160 215070586 cwi gbm sny 1942-09-01 14:54:52.499616000 1963-05-18 08:36:43.510414000 -18498596955452010539981835178192188.226 +1562052550 1646857428 jey nid fau 1934-03-11 15:13:46.365718000 1987-10-21 10:06:01.203959000 5408566632826149467328159735024.295 +-1395435650 -1216257492 nyt lgb sni 1956-02-18 23:23:54.854701000 1939-03-17 18:35:27.517627000 -9914950235131931232544722979967700.172 +\N -1216650714 xje cni oau 1991-05-29 09:26:34.533845000 1939-01-11 14:49:02.082757000 16968252198811674746810914345035364.202 +-1314825140 215332734 fal jep bvh 1946-05-18 21:24:29.544908000 1963-06-13 10:07:17.684362000 14189033056219786885922193778871547.221 +130527856 214677364 aup hcn oju 1968-04-27 05:34:38.033891000 1987-12-12 13:07:09.551855000 4284250986573100087787113578774339.039 +1467941418 1646726354 kfa oje bvq 1967-01-19 09:27:51.014248000 1987-10-08 09:20:44.116985000 -11383419176718672449656045964799139.304 +1548551928 -1216388566 bmh fql gbv 1995-02-12 15:56:11.579136000 1939-02-06 16:19:36.256705000 -14152253646368954881588442222773641.212 +-1301193444 -1217043936 vqc doj kvq 1980-07-11 00:07:55.461272000 1938-12-29 14:03:44.995783000 18467875476160060824241979197910861.965 +\N 215201660 gbm kfq wrd 1942-09-14 15:40:09.586590000 1963-05-31 09:22:00.597388000 -18457057946748760250705769001958814.522 +36416724 1646988502 nid rmh cwi 1934-03-24 15:59:03.452692000 1987-11-03 10:51:18.290933000 46947575336076438743394335968397.999 +117027234 1646333132 rdx pkf wrm 1956-03-03 00:09:11.941675000 1987-09-25 08:35:27.030011000 -9873411226428680943268656803734326.468 +1562183624 -1216519640 cni grm sey 1991-06-11 10:11:51.620819000 1939-01-24 15:34:19.169731000 17009791207514925036086980521268737.906 +-1395304576 215463808 jep nit xje 1946-05-31 22:09:46.631882000 1963-06-26 10:52:34.771336000 14230572064923037175198259955104920.925 +-1314694066 214808438 eyt lgr sny 1968-05-10 06:19:55.120865000 1963-05-18 08:36:43.510414000 4325789995276350377063179755007712.743 +\N 1646857428 oje sni fau 1967-02-01 10:13:08.101222000 1987-10-21 10:06:01.203959000 -11341880168015422160379979788565765.6 +130658930 -1216257492 fql jup kfa 1995-02-25 16:41:28.666110000 1939-02-19 17:04:53.343679000 -14110714637665704592312376046540267.508 +1468072492 -1216912862 aug hsn oau 1980-07-24 00:53:12.548246000 1939-01-11 14:49:02.082757000 18509414484863311113518045374144235.669 +1548683002 215332734 kfq oju bvh 1942-09-27 16:25:26.673564000 1963-06-13 10:07:17.684362000 -18415518938045509961429702825725440.818 +-1301062370 214677364 rmh vql gbm 1934-04-06 16:44:20.539666000 1987-11-16 11:36:35.377907000 88486584039326728019460512201771.703 +36547798 1646464206 vhc toj bvq 1956-03-16 00:54:29.028649000 1987-10-08 09:20:44.116985000 -9831872217725430653992590627500952.764 +\N -1216388566 grm kvq gbv 1991-06-24 10:57:08.707793000 1939-02-06 16:19:36.256705000 17051330216218175325363046697502111.61 +117158308 -1217043936 nit rmx cni 1946-06-13 22:55:03.718856000 1963-07-09 11:37:51.858310000 14272111073626287464474326131338294.629 +1562314698 214939512 idx pkv wrd 1968-05-23 07:05:12.207839000 1963-05-31 09:22:00.597388000 4367329003979600666339245931241086.447 +-1395173502 1646988502 sni wrm cwi 1967-02-14 10:58:25.188196000 1987-11-03 10:51:18.290933000 -11300341159312171871103913612332391.896 +-1314562992 1646333132 jup nyt oje 1995-03-10 17:26:45.753084000 1939-03-04 17:50:10.430653000 -14069175628962454303036309870306893.804 +130790004 -1216781788 eyk lwr sey 1980-08-06 01:38:29.635220000 1939-01-24 15:34:19.169731000 18550953493566561402794111550377609.373 +\N 215463808 oju sny xje 1942-10-10 17:10:43.760538000 1963-06-26 10:52:34.771336000 -18373979929342259672153636649492067.114 +1468203566 214808438 vql aup kfq 1934-04-19 17:29:37.626640000 1987-11-29 12:21:52.464881000 130025592742577017295526688435145.407 +1548814076 1646595280 alg xsn fau 1956-03-29 01:39:46.115623000 1987-10-21 10:06:01.203959000 -9790333209022180364716524451267579.06 +-1300931296 -1216257492 kvq oau kfa 1991-07-07 11:42:25.794767000 1939-02-19 17:04:53.343679000 17092869224921425614639112873735485.314 +36678872 -1216912862 rmx vqc grm 1946-06-26 23:40:20.805830000 1963-07-22 12:23:08.945284000 14313650082329537753750392307571668.333 +117289382 215070586 mhc toa bvh 1968-06-05 07:50:29.294813000 1963-06-13 10:07:17.684362000 4408868012682850955615312107474460.151 +\N 214677364 wrm bvq gbm 1967-02-27 11:43:42.275169000 1987-11-16 11:36:35.377907000 -11258802150608921581827847436099018.192 +1562445772 1646464206 nyt rdx sni 1995-03-23 18:12:02.840058000 1939-03-17 18:35:27.517627000 -14027636620259204013760243694073520.1 +-1395042428 -1216650714 ido pbv gbv 1980-08-19 02:23:46.722194000 1939-02-06 16:19:36.256705000 18592492502269811692070177726610983.077 +-1314431918 -1217043936 sny wrd cni 1942-10-23 17:56:00.847512000 1963-07-09 11:37:51.858310000 -18332440920639009382877570473258693.41 +130921078 214939512 aup eyt oju 1934-05-02 18:14:54.713614000 1987-12-12 13:07:09.551855000 171564601445827306571592864668519.111 +1468334640 1646726354 epk cwr cwi 1956-04-11 02:25:03.202597000 1987-11-03 10:51:18.290933000 -9748794200318930075440458275034205.356 +\N 1646333132 oau sey oje 1991-07-20 12:27:42.881741000 1939-03-04 17:50:10.430653000 17134408233624675903915179049968859.018 +1548945150 -1216781788 vqc aug kvq 1946-07-10 00:25:37.892804000 1938-12-29 14:03:44.995783000 14355189091032788043026458483805042.037 +-1300800222 215201660 qlg xse xje 1968-06-18 08:35:46.381786000 1963-06-26 10:52:34.771336000 4450407021386101244891378283707833.855 +36809946 214808438 bvq fau kfq 1967-03-12 12:28:59.362143000 1987-11-29 12:21:52.464881000 -11217263141905671292551781259865644.488 +117420456 1646595280 rdx vhc wrm 1995-04-05 18:57:19.927032000 1987-09-25 08:35:27.030011000 -13986097611555953724484177517840146.396 +1562576846 -1216519640 mhs tfa kfa 1980-09-01 03:09:03.809168000 1939-02-19 17:04:53.343679000 18634031510973061981346243902844356.781 +\N -1216912862 wrd bvh grm 1942-11-05 18:41:17.934486000 1963-07-22 12:23:08.945284000 -18290901911935759093601504297025319.706 +-1394911354 215070586 eyt idx sny 1934-05-15 19:00:11.800588000 1963-05-18 08:36:43.510414000 213103610149077595847659040901892.815 +-1314300844 1646857428 ito gbv gbm 1956-04-24 03:10:20.289571000 1987-11-16 11:36:35.377907000 -9707255191615679786164392098800831.652 +131052152 1646464206 sey wid sni 1991-08-02 13:12:59.968715000 1939-03-17 18:35:27.517627000 17175947242327926193191245226202232.722 +1468465714 -1216650714 aug eyk oau 1946-07-23 01:10:54.979777000 1939-01-11 14:49:02.082757000 14396728099736038332302524660038415.741 +1549076224 215332734 upk cwi cni 1968-07-01 09:21:03.468760000 1963-07-09 11:37:51.858310000 4491946030089351534167444459941207.559 +\N 214939512 fau jey oju \N 1987-12-12 13:07:09.551855000 -11175724133202421003275715083632270.784 +-1300669148 1646726354 vhc alg bvq 1967-03-25 13:14:16.449117000 1987-10-08 09:20:44.116985000 -13944558602852703435208111341606772.692 +36941020 -1216388566 qlw xje oje 1995-04-18 19:42:37.014006000 1939-03-04 17:50:10.430653000 18675570519676312270622310079077730.485 +117551530 -1216781788 bvh fal kvq 1980-09-14 03:54:20.896142000 1938-12-29 14:03:44.995783000 -18249362903232508804325438120791946.002 +1562707920 215201660 idx mhc wrd 1942-11-18 19:26:35.021460000 1963-05-31 09:22:00.597388000 254642618852327885123725217135266.519 +-1394780280 1646988502 mxs kfa kfq 1934-05-28 19:45:28.887562000 1987-11-29 12:21:52.464881000 -9665716182912429496888325922567457.948 +\N 1646595280 wid bmh wrm 1956-05-07 03:55:37.376545000 1987-09-25 08:35:27.030011000 17217486251031176482467311402435606.426 +-1314169770 -1216519640 eyk ido sey 1991-08-15 13:58:17.055689000 1939-01-24 15:34:19.169731000 14438267108439288621578590836271789.445 +131183226 215463808 yto gbm grm 1946-08-05 01:56:12.066751000 1963-07-22 12:23:08.945284000 4533485038792601823443510636174581.263 +1468596788 215070586 jey nid sny 1968-07-14 10:06:20.555734000 1963-05-18 08:36:43.510414000 -11134185124499170713999648907398897.08 +1549207298 1646857428 alg epk fau 1967-04-07 13:59:33.536091000 1987-10-21 10:06:01.203959000 -13903019594149453145932045165373398.988 +-1300538074 -1216257492 upb cni sni 1995-05-01 20:27:54.100980000 1939-03-17 18:35:27.517627000 18717109528379562559898376255311104.189 +\N -1216650714 fal jep oau 1980-09-27 04:39:37.983116000 1939-01-11 14:49:02.082757000 -18207823894529258515049371944558572.298 +37072094 215332734 mhc qlg bvh 1942-12-01 20:11:52.108434000 1963-06-13 10:07:17.684362000 296181627555578174399791393368640.223 +117682604 214677364 qcw oje oju 1934-06-10 20:30:45.974536000 1987-12-12 13:07:09.551855000 -9624177174209179207612259746334084.244 +1562838994 1646726354 bmh fql bvq 1956-05-20 04:40:54.463519000 1987-10-08 09:20:44.116985000 17259025259734426771743377578668980.13 +-1394649206 -1216388566 ido mhs gbv 1991-08-28 14:43:34.142663000 1939-02-06 16:19:36.256705000 14479806117142538910854657012505163.149 +-1314038696 -1217043936 dxs kfq kvq 1946-08-18 02:41:29.153725000 1938-12-29 14:03:44.995783000 4575024047495852112719576812407954.967 +\N 215201660 nid rmh wrd 1968-07-27 10:51:37.642708000 1963-05-31 09:22:00.597388000 -11092646115795920424723582731165523.376 +131314300 1646988502 epk ito cwi 1967-04-20 14:44:50.623065000 1987-11-03 10:51:18.290933000 -13861480585446202856655978989140025.284 +1468727862 1646333132 ytf grm wrm 1995-05-14 21:13:11.187954000 1987-09-25 08:35:27.030011000 18758648537082812849174442431544477.893 +1549338372 -1216519640 jep nit sey 1980-10-10 05:24:55.070090000 1939-01-24 15:34:19.169731000 -18166284885826008225773305768325198.594 +-1300407000 215463808 qlg upk xje 1942-12-14 20:57:09.195408000 1963-06-26 10:52:34.771336000 337720636258828463675857569602013.927 +37203168 214808438 ugb sni sny 1934-06-23 21:16:03.061510000 1963-05-18 08:36:43.510414000 -9582638165505928918336193570100710.54 +\N 1646857428 fql jup fau 1956-06-02 05:26:11.550493000 1987-10-21 10:06:01.203959000 17300564268437677061019443754902353.834 +117813678 -1216257492 mhs qlw kfa 1991-09-10 15:28:51.229636000 1939-02-19 17:04:53.343679000 14521345125845789200130723188738536.853 +1562970068 -1216912862 hcw oju oau 1946-08-31 03:26:46.240699000 1939-01-11 14:49:02.082757000 4616563056199102401995642988641328.671 +-1394518132 215332734 rmh vql bvh 1968-08-09 11:36:54.729682000 1963-06-13 10:07:17.684362000 -11051107107092670135447516554932149.672 +-1313907622 214677364 ito mxs gbm 1967-05-03 15:30:07.710039000 1987-11-16 11:36:35.377907000 -13819941576742952567379912812906651.58 +131445374 1646464206 dxj kvq bvq 1995-05-27 21:58:28.274928000 1987-10-08 09:20:44.116985000 18800187545786063138450508607777851.597 +\N -1216388566 nit rmx gbv 1980-10-23 06:10:12.157064000 1939-02-06 16:19:36.256705000 -18124745877122757936497239592091824.89 +1468858936 -1217043936 upk yto cni 1942-12-27 21:42:26.282382000 1963-07-09 11:37:51.858310000 379259644962078752951923745835387.631 +1549469446 214939512 ykf wrm wrd 1934-07-06 22:01:20.148484000 1963-05-31 09:22:00.597388000 -9541099156802678629060127393867336.836 +-1300275926 1646988502 jup nyt cwi 1956-06-15 06:11:28.637467000 1987-11-03 10:51:18.290933000 17342103277140927350295509931135727.538 +37334242 1646333132 qlw upb oje 1991-09-23 16:14:08.316610000 1939-03-04 17:50:10.430653000 14562884134549039489406789364971910.557 +117944752 -1216781788 lgb sny sey 1946-09-13 04:12:03.327673000 1939-01-24 15:34:19.169731000 4658102064902352691271709164874702.375 +\N 215463808 vql aup xje 1968-08-22 12:22:11.816656000 1963-06-26 10:52:34.771336000 -11009568098389419846171450378698775.968 +1563101142 214808438 mxs qcw kfq 1967-05-16 16:15:24.797013000 1987-11-29 12:21:52.464881000 -13778402568039702278103846636673277.876 +-1394387058 1646595280 hcn oau fau 1995-06-09 22:43:45.361902000 1987-10-21 10:06:01.203959000 18841726554489313427726574784011225.301 +-1313776548 -1216257492 rmx vqc kfa 1980-11-05 06:55:29.244038000 1939-02-19 17:04:53.343679000 -18083206868419507647221173415858451.186 +131576448 -1216912862 yto dxs grm 1943-01-09 22:27:43.369356000 1963-07-22 12:23:08.945284000 420798653665329042227989922068761.335 +1468990010 215070586 doj bvq bvh 1934-07-19 22:46:37.235458000 1963-06-13 10:07:17.684362000 -9499560148099428339784061217633963.132 +\N 214677364 nyt rdx gbm 1956-06-28 06:56:45.724441000 1987-11-16 11:36:35.377907000 17383642285844177639571576107369101.242 +1549600520 1646464206 upb ytf sni 1991-10-06 16:59:25.403584000 1939-03-17 18:35:27.517627000 14604423143252289778682855541205284.261 +-1300144852 -1216650714 pkf wrd gbv 1946-09-26 04:57:20.414647000 1939-02-06 16:19:36.256705000 4699641073605602980547775341108076.079 +37465316 -1217043936 aup eyt cni 1968-09-04 13:07:28.903630000 1963-07-09 11:37:51.858310000 -10968029089686169556895384202465402.264 +118075826 214939512 qcw ugb oju 1967-05-29 17:00:41.883987000 1987-12-12 13:07:09.551855000 -13736863559336451988827780460439904.172 +1563232216 1646726354 lgr sey cwi 1995-06-22 23:29:02.448876000 1987-11-03 10:51:18.290933000 18883265563192563717002640960244599.005 +\N 1646333132 vqc aug oje 1980-11-18 07:40:46.331012000 1939-03-04 17:50:10.430653000 -18041667859716257357945107239625077.482 +-1394255984 -1216781788 dxs hcw kvq 1943-01-22 23:13:00.456330000 1938-12-29 14:03:44.995783000 462337662368579331504056098302135.039 +-1313645474 215201660 hsn fau xje 1934-08-01 23:31:54.322432000 1963-06-26 10:52:34.771336000 -9458021139396178050507995041400589.428 +131707522 214808438 rdx vhc kfq 1956-07-11 07:42:02.811415000 1987-11-29 12:21:52.464881000 17425181294547427928847642283602474.946 +1469121084 1646595280 ytf dxj wrm 1991-10-19 17:44:42.490558000 1987-09-25 08:35:27.030011000 14645962151955540067958921717438657.965 +1549731594 -1216519640 toj bvh kfa 1946-10-09 05:42:37.501621000 1939-02-19 17:04:53.343679000 4741180082308853269823841517341449.783 +\N -1216912862 eyt idx grm 1968-09-17 13:52:45.990604000 1963-07-22 12:23:08.945284000 -10926490080982919267619318026232028.56 +-1300013778 215070586 ugb ykf sny 1967-06-11 17:45:58.970961000 1963-05-18 08:36:43.510414000 -13695324550633201699551714284206530.468 +37596390 1646857428 pkv wid gbm 1995-07-06 00:14:19.535850000 1987-11-16 11:36:35.377907000 18924804571895814006278707136477972.709 +118206900 1646464206 aug eyk sni 1980-12-01 08:26:03.417986000 1939-03-17 18:35:27.517627000 -18000128851013007068669041063391703.778 +1563363290 -1216650714 hcw lgb oau 1943-02-04 23:58:17.543304000 1939-01-11 14:49:02.082757000 503876671071829620780122274535508.743 +-1394124910 215332734 lwr jey cni 1934-08-15 00:17:11.409406000 1963-07-09 11:37:51.858310000 -9416482130692927761231928865167215.724 +\N 214939512 vhc alg oju 1956-07-24 08:27:19.898389000 1987-12-12 13:07:09.551855000 17466720303250678218123708459835848.65 +-1313514400 1646726354 dxj hcn bvq 1991-11-01 18:29:59.577532000 1987-10-08 09:20:44.116985000 14687501160658790357234987893672031.669 +131838596 -1216388566 xsn fal oje 1946-10-22 06:27:54.588595000 1939-03-04 17:50:10.430653000 4782719091012103559099907693574823.487 +1469252158 -1216781788 \N mhc kvq 1968-09-30 14:38:03.077578000 1938-12-29 14:03:44.995783000 -10884951072279668978343251849998654.856 +1549862668 215201660 idx doj wrd 1967-06-24 18:31:16.057935000 1963-05-31 09:22:00.597388000 -13653785541929951410275648107973156.764 +-1299882704 1646988502 ykf bmh kfq 1995-07-19 00:59:36.622824000 1987-11-29 12:21:52.464881000 18966343580599064295554773312711346.413 +\N 1646595280 toa ido wrm 1980-12-14 09:11:20.504960000 1987-09-25 08:35:27.030011000 -17958589842309756779392974887158330.074 +37727464 -1216519640 eyk pkf sey 1943-02-18 00:43:34.630278000 1939-01-24 15:34:19.169731000 545415679775079910056188450768882.447 +118337974 215463808 lgb nid grm 1934-08-28 01:02:28.496380000 1963-07-22 12:23:08.945284000 -9374943121989677471955862688933842.02 +1563494364 215070586 pbv epk sny 1956-08-06 09:12:36.985362000 1963-05-18 08:36:43.510414000 17508259311953928507399774636069222.354 +-1393993836 1646857428 alg lgr fau 1991-11-14 19:15:16.664506000 1987-10-21 10:06:01.203959000 14729040169362040646511054069905405.373 +-1313383326 -1216257492 hcn jep sni 1946-11-04 07:13:11.675569000 1939-03-17 18:35:27.517627000 4824258099715353848375973869808197.191 +\N -1216650714 cwr qlg oau 1968-10-13 15:23:20.164552000 1939-01-11 14:49:02.082757000 -10843412063576418689067185673765281.152 +131969670 215332734 mhc hsn bvh 1967-07-07 19:16:33.144909000 1963-06-13 10:07:17.684362000 -13612246533226701120999581931739783.06 +1469383232 214677364 doj fql oju 1995-08-01 01:44:53.709798000 1987-12-12 13:07:09.551855000 19007882589302314584830839488944720.117 +1549993742 1646726354 xse mhs bvq 1980-12-27 09:56:37.591934000 \N -17917050833606506490116908710924956.37 +-1299751630 -1216388566 ido toj gbv 1943-03-03 01:28:51.717252000 1987-10-08 09:20:44.116985000 586954688478330199332254627002256.151 +37858538 -1217043936 pkf rmh kvq 1934-09-10 01:47:45.583353000 1939-02-06 16:19:36.256705000 -9333404113286427182679796512700468.316 +\N 215201660 tfa ito wrd 1956-08-19 09:57:54.072336000 1938-12-29 14:03:44.995783000 17549798320657178796675840812302596.058 +118469048 1646988502 epk pkv cwi 1991-11-27 20:00:33.751480000 1963-05-31 09:22:00.597388000 14770579178065290935787120246138779.077 +1563625438 1646333132 lgr nit wrm 1946-11-17 07:58:28.762543000 1987-11-03 10:51:18.290933000 4865797108418604137652040046041570.895 +-1393862762 -1216519640 gbv upk sey 1968-10-26 16:08:37.251526000 1987-09-25 08:35:27.030011000 -10801873054873168399791119497531907.448 +-1313252252 215463808 qlg lwr xje 1967-07-20 20:01:50.231883000 1939-01-24 15:34:19.169731000 -13570707524523450831723515755506409.356 +132100744 214808438 hsn jup sny 1995-08-14 02:30:10.796772000 1963-06-26 10:52:34.771336000 19049421598005564874106905665178093.821 +\N 1646857428 cwi qlw fau 1981-01-09 10:41:54.678908000 1963-05-18 08:36:43.510414000 -17875511824903256200840842534691582.666 +1469514306 -1216257492 mhs xsn kfa 1943-03-16 02:14:08.804226000 1987-10-21 10:06:01.203959000 628493697181580488608320803235629.855 +1550124816 -1216912862 toj vql oau 1934-09-23 02:33:02.670327000 1939-02-19 17:04:53.343679000 -9291865104583176893403730336467094.612 +-1299620556 215332734 xje mxs bvh 1956-09-01 10:43:11.159310000 1939-01-11 14:49:02.082757000 17591337329360429085951906988535969.762 +37989612 214677364 ito toa gbm 1991-12-10 20:45:50.838454000 1963-06-13 10:07:17.684362000 14812118186768541225063186422372152.781 +118600122 1646464206 pkv rmx bvq 1946-11-30 08:43:45.849517000 1987-11-16 11:36:35.377907000 4907336117121854426928106222274944.599 +\N -1216388566 kfa yto gbv 1968-11-08 16:53:54.338500000 1987-10-08 09:20:44.116985000 -10760334046169918110515053321298533.744 +1563756512 -1217043936 upk pbv cni 1967-08-02 20:47:07.318857000 1939-02-06 16:19:36.256705000 -13529168515820200542447449579273035.652 +-1393731688 214939512 lwr nyt wrd 1995-08-27 03:15:27.883746000 1963-07-09 11:37:51.858310000 19090960606708815163382971841411467.525 +-1313121178 1646988502 gbm upb cwi 1981-01-22 11:27:11.765882000 1963-05-31 09:22:00.597388000 -17833972816200005911564776358458208.962 +132231818 1646333132 qlw cwr oje 1943-03-29 02:59:25.891200000 1987-11-03 10:51:18.290933000 670032705884830777884386979469003.559 +1469645380 -1216781788 xsn aup sey 1934-10-06 03:18:19.757301000 1939-03-04 17:50:10.430653000 -9250326095879926604127664160233720.908 +\N 215463808 cni qcw xje 1956-09-14 11:28:28.246284000 1939-01-24 15:34:19.169731000 17632876338063679375227973164769343.466 +1550255890 214808438 mxs xse kfq 1991-12-23 21:31:07.925428000 1963-06-26 10:52:34.771336000 14853657195471791514339252598605526.485 +-1299489482 1646595280 toa vqc fau \N 1987-11-29 12:21:52.464881000 4948875125825104716204172398508318.303 +38120686 -1216257492 oje dxs kfa 1946-12-13 09:29:02.936491000 1987-10-21 10:06:01.203959000 -10718795037466667821238987145065160.04 +118731196 -1216912862 yto tfa grm 1968-11-21 17:39:11.425474000 1939-02-19 17:04:53.343679000 -13487629507116950253171383403039661.948 +1563887586 215070586 pbv rdx bvh 1967-08-15 21:32:24.405831000 1963-07-22 12:23:08.945284000 19132499615412065452659038017644841.229 +\N 214677364 kfq ytf gbm 1995-09-09 04:00:44.970720000 1963-06-13 10:07:17.684362000 -17792433807496755622288710182224835.258 +-1393600614 1646464206 upb gbv sni 1981-02-04 12:12:28.852856000 1987-11-16 11:36:35.377907000 711571714588081067160453155702377.263 +-1312990104 -1216650714 cwr eyt gbv 1943-04-11 03:44:42.978174000 1939-03-17 18:35:27.517627000 -9208787087176676314851597984000347.204 +132362892 -1217043936 grm ugb cni 1934-10-19 04:03:36.844275000 1939-02-06 16:19:36.256705000 17674415346766929664504039341002717.17 +1469776454 214939512 qcw cwi oju 1956-09-27 12:13:45.333258000 1963-07-09 11:37:51.858310000 14895196204175041803615318774838900.189 +1550386964 1646726354 xse aug cwi 1992-01-05 22:16:25.012402000 1987-12-12 13:07:09.551855000 4990414134528355005480238574741692.007 +\N 1646333132 sni hcw oje 1946-12-26 10:14:20.023465000 1987-11-03 10:51:18.290933000 -10677256028763417531962920968831786.336 +-1299358408 -1216781788 dxs xje kvq 1968-12-04 18:24:28.512448000 1939-03-04 17:50:10.430653000 -13446090498413699963895317226806288.244 +38251760 215201660 tfa vhc xje 1967-08-28 22:17:41.492805000 1938-12-29 14:03:44.995783000 19174038624115315741935104193878214.933 +118862270 214808438 oju dxj kfq 1995-09-22 04:46:02.057694000 1963-06-26 10:52:34.771336000 -17750894798793505333012644005991461.554 +1564018660 1646595280 ytf kfa wrm 1981-02-17 12:57:45.939829000 1987-11-29 12:21:52.464881000 753110723291331356436519331935750.967 +-1393469540 -1216519640 gbv idx kfa 1943-04-24 04:30:00.065147000 1987-09-25 08:35:27.030011000 -9167248078473426025575531807766973.5 +\N -1216912862 kvq ykf grm 1934-11-01 04:48:53.931249000 1939-02-19 17:04:53.343679000 17715954355470179953780105517236090.874 +-1312859030 215070586 ugb gbm sny 1956-10-10 12:59:02.420232000 1963-07-22 12:23:08.945284000 14936735212878292092891384951072273.893 +132493966 1646857428 cwi eyk gbm 1992-01-18 23:01:42.099376000 1963-05-18 08:36:43.510414000 5031953143231605294756304750975065.711 +1469907528 1646464206 wrm lgb sni 1947-01-08 10:59:37.110439000 1987-11-16 11:36:35.377907000 -10635717020060167242686854792598412.632 +1550518038 -1216650714 hcw cni oau 1968-12-17 19:09:45.599422000 1939-03-17 18:35:27.517627000 -13404551489710449674619251050572914.54 +-1299227334 215332734 xje alg cni 1967-09-10 23:02:58.579779000 1939-01-11 14:49:02.082757000 19215577632818566031211170370111588.637 +\N 214939512 sny hcn oju 1995-10-05 05:31:19.144667000 1963-07-09 11:37:51.858310000 -17709355790090255043736577829758087.85 +38382834 1646726354 dxj oje bvq 1981-03-02 13:43:03.026803000 1987-12-12 13:07:09.551855000 794649731994581645712585508169124.671 +118993344 -1216388566 kfa mhc oje 1943-05-07 05:15:17.152121000 1987-10-08 09:20:44.116985000 -9125709069770175736299465631533599.796 +1564149734 -1216781788 oau doj kvq 1934-11-14 05:34:11.018223000 1939-03-04 17:50:10.430653000 17757493364173430243056171693469464.578 +-1393338466 215201660 ykf kfq wrd 1956-10-23 13:44:19.507206000 1938-12-29 14:03:44.995783000 14978274221581542382167451127305647.597 +-1312727956 1646988502 gbm ido kfq 1992-01-31 23:46:59.186350000 1963-05-31 09:22:00.597388000 5073492151934855584032370927208439.415 +\N 1646595280 bvq pkf wrm 1947-01-21 11:44:54.197413000 1987-11-29 12:21:52.464881000 -10594178011356916953410788616365038.928 +132625040 -1216519640 lgb grm sey 1968-12-30 19:55:02.686396000 \N -13363012481007199385343184874339540.836 +1470038602 215463808 cni epk grm 1967-09-23 23:48:15.666753000 1987-09-25 08:35:27.030011000 19257116641521816320487236546344962.341 +1550649112 215070586 wrd lgr sny 1995-10-18 06:16:36.231641000 1939-01-24 15:34:19.169731000 -17667816781387004754460511653524714.146 +-1299096260 1646857428 hcn sni fau 1981-03-15 14:28:20.113777000 1963-07-22 12:23:08.945284000 836188740697831934988651684402498.375 +38513908 -1216257492 oje \N sni 1943-05-20 06:00:34.239095000 1963-05-18 08:36:43.510414000 -9084170061066925447023399455300226.092 +\N -1216650714 sey qlg oau 1934-11-27 06:19:28.105197000 1987-10-21 10:06:01.203959000 17799032372876680532332237869702838.282 +119124418 215332734 doj hsn bvh 1956-11-05 14:29:36.594180000 1939-03-17 18:35:27.517627000 15019813230284792671443517303539021.301 +1564280808 214677364 kfq oju oju 1992-02-14 00:32:16.273324000 1939-01-11 14:49:02.082757000 5115031160638105873308437103441813.119 +-1393207392 1646726354 fau mhs bvq 1947-02-03 12:30:11.284387000 1963-06-13 10:07:17.684362000 -10552639002653666664134722440131665.224 +-1312596882 -1216388566 pkf toj gbv 1969-01-12 20:40:19.773370000 1987-12-12 13:07:09.551855000 -13321473472303949096067118698106167.132 +132756114 -1217043936 grm kvq kvq 1967-10-07 00:33:32.753727000 1987-10-08 09:20:44.116985000 19298655650225066609763302722578336.045 +\N 215201660 bvh ito wrd 1995-10-31 07:01:53.318615000 1939-02-06 16:19:36.256705000 -17626277772683754465184445477291340.442 +1470169676 1646988502 lgr pkv cwi 1981-03-28 15:13:37.200751000 1938-12-29 14:03:44.995783000 877727749401082224264717860635872.079 +1550780186 1646333132 sni wrm wrm 1943-06-02 06:45:51.326069000 1963-05-31 09:22:00.597388000 -9042631052363675157747333279066852.388 +-1298965186 -1216519640 wid upk sey 1934-12-10 07:04:45.192171000 1987-11-03 10:51:18.290933000 17840571381579930821608304045936211.986 +38644982 215463808 hsn lwr xje 1956-11-18 15:14:53.681154000 1987-09-25 08:35:27.030011000 15061352238988042960719583479772395.005 +119255492 214808438 oju sny sny 1992-02-27 01:17:33.360298000 1939-01-24 15:34:19.169731000 5156570169341356162584503279675186.823 +\N 1646857428 jey qlw fau 1947-02-16 13:15:28.371361000 1963-06-26 10:52:34.771336000 -10511099993950416374858656263898291.52 +1564411882 -1216257492 toj xsn kfa 1969-01-25 21:25:36.860344000 1963-05-18 08:36:43.510414000 -13279934463600698806791052521872793.428 +-1393076318 -1216912862 kvq oau oau 1967-10-20 01:18:49.840701000 1987-10-21 10:06:01.203959000 19340194658928316899039368898811709.749 +-1312465808 215332734 fal mxs bvh 1995-11-13 07:47:10.405589000 1939-02-19 17:04:53.343679000 -17584738763980504175908379301057966.738 +132887188 214677364 pkv toa gbm 1981-04-10 15:58:54.287725000 1939-01-11 14:49:02.082757000 919266758104332513540784036869245.783 +1470300750 1646464206 wrm bvq bvq 1943-06-15 07:31:08.413043000 1963-06-13 10:07:17.684362000 -9001092043660424868471267102833478.684 +\N -1216388566 bmh yto gbv 1934-12-23 07:50:02.279145000 1987-11-16 11:36:35.377907000 17882110390283181110884370222169585.69 +1550911260 -1217043936 lwr pbv cni 1956-12-01 16:00:10.768128000 1987-10-08 09:20:44.116985000 15102891247691293249995649656005768.709 +-1298834112 214939512 sny wrd wrd 1992-03-11 02:02:50.447272000 1939-02-06 16:19:36.256705000 5198109178044606451860569455908560.527 +38776056 1646988502 nid upb cwi 1947-03-01 14:00:45.458335000 1963-07-09 11:37:51.858310000 -10469560985247166085582590087664917.816 +119386566 1646333132 xsn cwr oje 1969-02-07 22:10:53.947318000 1963-05-31 09:22:00.597388000 -13238395454897448517514986345639419.724 +1564542956 -1216781788 oau sey sey 1967-11-02 02:04:06.927675000 1987-11-03 10:51:18.290933000 19381733667631567188315435075045083.453 +\N 215463808 jep qcw xje 1995-11-26 08:32:27.492563000 1939-03-04 17:50:10.430653000 -17543199755277253886632313124824593.034 +-1392945244 214808438 toa xse kfq 1981-04-23 16:44:11.374699000 1939-01-24 15:34:19.169731000 960805766807582802816850213102619.487 +-1312334734 1646595280 bvq fau fau 1943-06-28 08:16:25.500017000 1963-06-26 10:52:34.771336000 -8959553034957174579195200926600104.98 +133018262 -1216257492 fql dxs kfa 1935-01-05 08:35:19.366119000 1987-11-29 12:21:52.464881000 17923649398986431400160436398402959.394 +1470431824 -1216912862 pbv tfa grm 1956-12-14 16:45:27.855102000 1987-10-21 10:06:01.203959000 15144430256394543539271715832239142.413 +1551042334 215070586 wrd bvh bvh 1992-03-24 02:48:07.534246000 1939-02-19 17:04:53.343679000 5239648186747856741136635632141934.231 +\N 214677364 rmh ytf gbm 1947-03-14 14:46:02.545309000 1963-07-22 12:23:08.945284000 -10428021976543915796306523911431544.112 +-1298703038 1646464206 cwr gbv sni 1969-02-20 22:56:11.034292000 1963-06-13 10:07:17.684362000 -13196856446194198228238920169406046.02 +38907130 -1216650714 sey wid gbv 1967-11-15 02:49:24.014649000 1987-11-16 11:36:35.377907000 19423272676334817477591501251278457.157 +119517640 -1217043936 nit ugb cni 1995-12-09 09:17:44.579537000 1939-03-17 18:35:27.517627000 -17501660746574003597356246948591219.33 +1564674030 214939512 xse cwi oju 1981-05-06 17:29:28.461673000 1939-02-06 16:19:36.256705000 1002344775510833092092916389335993.191 +-1392814170 1646726354 fau jey cwi 1943-07-11 09:01:42.586991000 1963-07-09 11:37:51.858310000 -8918014026253924289919134750366731.276 +\N 1646333132 jup hcw oje 1935-01-18 09:20:36.453093000 1987-12-12 13:07:09.551855000 17965188407689681689436502574636333.098 +-1312203660 -1216781788 tfa xje kvq 1956-12-27 17:30:44.942076000 1987-11-03 10:51:18.290933000 15185969265097793828547782008472516.117 +133149336 215201660 bvh fal xje 1992-04-06 03:33:24.621220000 1939-03-04 17:50:10.430653000 5281187195451107030412701808375307.935 +1470562898 214808438 vql dxj kfq 1947-03-27 15:31:19.632283000 1938-12-29 14:03:44.995783000 -10386482967840665507030457735198170.408 +1551173408 1646595280 gbv kfa wrm 1969-03-05 23:41:28.121266000 1963-06-26 10:52:34.771336000 -13155317437490947938962853993172672.316 +-1298571964 -1216519640 wid bmh kfa 1967-11-28 03:34:41.101623000 1987-11-29 12:21:52.464881000 19464811685038067766867567427511830.861 +\N -1216912862 rmx ykf grm 1995-12-22 10:03:01.666511000 1987-09-25 08:35:27.030011000 -17460121737870753308080180772357845.626 +39038204 215070586 cwi gbm sny 1981-05-19 18:14:45.548647000 1939-02-19 17:04:53.343679000 1043883784214083381368982565569366.895 +119648714 1646857428 jey nid gbm 1943-07-24 09:46:59.673965000 1963-07-22 12:23:08.945284000 -8876475017550674000643068574133357.572 +1564805104 1646464206 nyt lgb sni 1935-01-31 10:05:53.540067000 1963-05-18 08:36:43.510414000 18006727416392931978712568750869706.802 +-1392683096 -1216650714 xje cni oau 1957-01-09 18:16:02.029050000 1987-11-16 11:36:35.377907000 15227508273801044117823848184705889.821 +-1312072586 215332734 fal jep cni 1992-04-19 04:18:41.708194000 1939-03-17 18:35:27.517627000 5322726204154357319688767984608681.639 +\N 214939512 aup hcn oju 1947-04-09 16:16:36.719257000 1939-01-11 14:49:02.082757000 -10344943959137415217754391558964796.704 +133280410 1646726354 kfa oje bvq 1969-03-19 00:26:45.208240000 1963-07-09 11:37:51.858310000 -13113778428787697649686787816939298.612 +1470693972 -1216388566 bmh fql oje 1967-12-11 04:19:58.188597000 1987-12-12 13:07:09.551855000 19506350693741318056143633603745204.565 +1551304482 -1216781788 vqc doj kvq 1996-01-04 10:48:18.753485000 1987-10-08 09:20:44.116985000 -17418582729167503018804114596124471.922 +-1298440890 215201660 gbm kfq wrd 1981-06-01 19:00:02.635621000 1939-03-04 17:50:10.430653000 1085422792917333670645048741802740.599 +39169278 1646988502 nid rmh kfq 1943-08-06 10:32:16.760939000 1938-12-29 14:03:44.995783000 -8834936008847423711367002397899983.868 +\N 1646595280 rdx pkf wrm 1935-02-13 10:51:10.627041000 1963-05-31 09:22:00.597388000 18048266425096182267988634927103080.506 +119779788 -1216519640 cni grm sey 1957-01-22 19:01:19.116024000 1987-11-29 12:21:52.464881000 15269047282504294407099914360939263.525 +1564936178 215463808 jep nit grm 1992-05-02 05:03:58.795168000 1987-09-25 08:35:27.030011000 5364265212857607608964834160842055.343 +-1392552022 215070586 eyt lgr sny 1947-04-22 17:01:53.806231000 1939-01-24 15:34:19.169731000 -10303404950434164928478325382731423 +-1311941512 1646857428 oje sni fau 1969-04-01 01:12:02.295213000 1963-07-22 12:23:08.945284000 -13072239420084447360410721640705924.908 +133411484 -1216257492 fql jup sni 1967-12-24 05:05:15.275570000 1963-05-18 08:36:43.510414000 19547889702444568345419699779978578.269 +\N -1216650714 aug hsn oau 1996-01-17 11:33:35.840459000 1987-10-21 10:06:01.203959000 -17377043720464252729528048419891098.218 +1470825046 215332734 kfq oju bvh 1981-06-14 19:45:19.722595000 1939-03-17 18:35:27.517627000 1126961801620583959921114918036114.303 +1551435556 214677364 rmh vql oju 1943-08-19 11:17:33.847913000 1939-01-11 14:49:02.082757000 -8793397000144173422090936221666610.164 +-1298309816 1646726354 vhc toj bvq 1935-02-26 11:36:27.714015000 1963-06-13 10:07:17.684362000 18089805433799432557264701103336454.21 +39300352 -1216388566 grm kvq gbv 1957-02-04 19:46:36.202998000 1987-12-12 13:07:09.551855000 15310586291207544696375980537172637.229 +119910862 -1217043936 nit rmx kvq 1992-05-15 05:49:15.882142000 1987-10-08 09:20:44.116985000 5405804221560857898240900337075429.047 +\N 215201660 idx pkv wrd 1947-05-05 17:47:10.893205000 1939-02-06 16:19:36.256705000 -10261865941730914639202259206498049.296 +1565067252 1646988502 sni wrm cwi 1969-04-14 01:57:19.382187000 1938-12-29 14:03:44.995783000 -13030700411381197071134655464472551.204 +-1392420948 1646333132 jup nyt wrm 1968-01-06 05:50:32.362544000 1963-05-31 09:22:00.597388000 19589428711147818634695765956211951.973 +-1311810438 -1216519640 eyk lwr sey 1996-01-30 12:18:52.927433000 1987-11-03 10:51:18.290933000 -17335504711761002440251982243657724.514 +133542558 215463808 oju sny xje 1981-06-27 20:30:36.809569000 1987-09-25 08:35:27.030011000 1168500810323834249197181094269488.007 +1470956120 214808438 vql aup sny 1943-09-01 12:02:50.934887000 1939-01-24 15:34:19.169731000 -8751857991440923132814870045433236.46 +\N 1646857428 alg xsn fau 1935-03-11 12:21:44.800989000 1963-06-26 10:52:34.771336000 18131344442502682846540767279569827.914 +1551566630 -1216257492 kvq oau kfa 1957-02-17 20:31:53.289972000 1963-05-18 08:36:43.510414000 15352125299910794985652046713406010.933 +-1298178742 -1216912862 rmx vqc oau 1992-05-28 06:34:32.969116000 1987-10-21 10:06:01.203959000 5447343230264108187516966513308802.751 +39431426 215332734 mhc toa bvh 1947-05-18 18:32:27.980178000 1939-02-19 17:04:53.343679000 -10220326933027664349926193030264675.592 +120041936 214677364 wrm bvq gbm 1969-04-27 02:42:36.469161000 1939-01-11 14:49:02.082757000 -12989161402677946781858589288239177.5 +1565198326 1646464206 nyt rdx bvq 1968-01-19 06:35:49.449518000 1963-06-13 10:07:17.684362000 19630967719851068923971832132445325.677 +\N -1216388566 ido pbv gbv 1996-02-12 13:04:10.014407000 1987-11-16 11:36:35.377907000 -17293965703057752150975916067424350.81 +-1392289874 -1217043936 sny wrd cni 1981-07-10 21:15:53.896543000 1987-10-08 09:20:44.116985000 1210039819027084538473247270502861.711 +-1311679364 214939512 aup eyt wrd 1943-09-14 12:48:08.021861000 1939-02-06 16:19:36.256705000 -8710318982737672843538803869199862.756 +133673632 1646988502 epk cwr cwi 1935-03-24 13:07:01.887963000 1963-07-09 11:37:51.858310000 18172883451205933135816833455803201.618 +1471087194 1646333132 oau sey oje 1957-03-02 21:17:10.376946000 1963-05-31 09:22:00.597388000 15393664308614045274928112889639384.637 +1551697704 -1216781788 vqc aug sey 1992-06-10 07:19:50.056090000 1987-11-03 10:51:18.290933000 -14457305820759193856084943006068793.4 +\N 215463808 qlg xse xje 1947-05-31 19:17:45.067152000 1939-03-04 17:50:10.430653000 -10178787924324414060650126854031301.888 +-1298047668 214808438 bvq fau kfq 1969-05-10 03:27:53.556135000 1939-01-24 15:34:19.169731000 -12947622393974696492582523112005803.796 +39562500 1646595280 rdx vhc fau 1968-02-01 07:21:06.536492000 1963-06-26 10:52:34.771336000 -263296658230627690673753078742955.402 +120173010 -1216257492 mhs tfa kfa 1996-02-25 13:49:27.101381000 1987-11-29 12:21:52.464881000 -17252426694354501861699849891190977.106 +1565329400 -1216912862 wrd bvh grm 1981-07-23 22:01:10.983517000 1987-10-21 10:06:01.203959000 1251578827730334827749313446736235.415 +-1392158800 215070586 eyt idx bvh 1943-09-27 13:33:25.108835000 1939-02-19 17:04:53.343679000 13935904563474403629658083479999083.81 +\N 214677364 ito gbv gbm 1935-04-06 13:52:18.974937000 1963-07-22 12:23:08.945284000 18214422459909183425092899632036575.322 +-1311548290 1646464206 sey wid sni 1957-03-15 22:02:27.463920000 1963-06-13 10:07:17.684362000 15435203317317295564204179065872758.341 +133804706 -1216650714 aug eyk gbv 1992-06-23 08:05:07.143064000 1987-11-16 11:36:35.377907000 -14415766812055943566808876829835419.696 +1471218268 -1217043936 upk cwi cni 1947-06-13 20:03:02.154126000 1939-03-17 18:35:27.517627000 -10137248915621163771374060677797928.184 +1551828778 214939512 fau jey oju 1969-05-23 04:13:10.643109000 1939-02-06 16:19:36.256705000 -12906083385271446203306456935772430.092 +-1297916594 1646726354 vhc alg cwi 1968-02-14 08:06:23.623466000 1963-07-09 11:37:51.858310000 -221757649527377401397686902509581.698 +\N 1646333132 qlw xje oje 1996-03-09 14:34:44.188355000 1987-12-12 13:07:09.551855000 -17210887685651251572423783714957603.402 +39693574 -1216781788 bvh fal kvq 1981-08-05 22:46:28.070491000 1987-11-03 10:51:18.290933000 1293117836433585117025379622969609.119 +120304084 215201660 idx mhc xje 1943-10-10 14:18:42.195809000 1939-03-04 17:50:10.430653000 13977443572177653918934149656232457.514 +1565460474 214808438 mxs kfa kfq 1935-04-19 14:37:36.061911000 1938-12-29 14:03:44.995783000 18255961468612433714368965808269949.026 +-1392027726 1646595280 wid bmh wrm 1957-03-28 22:47:44.550894000 1963-06-26 10:52:34.771336000 15476742326020545853480245242106132.045 +-1311417216 -1216519640 eyk ido kfa 1992-07-06 08:50:24.230037000 1987-11-29 12:21:52.464881000 -14374227803352693277532810653602045.992 +\N -1216912862 yto gbm grm 1947-06-26 20:48:19.241100000 1987-09-25 08:35:27.030011000 -10095709906917913482097994501564554.48 +133935780 215070586 jey nid sny 1969-06-05 04:58:27.730083000 1939-02-19 17:04:53.343679000 -12864544376568195914030390759539056.388 +1471349342 1646857428 alg epk \N 1968-02-27 08:51:40.710440000 1963-07-22 12:23:08.945284000 -180218640824127112121620726276207.994 +1551959852 1646464206 upb cni gbm 1996-03-22 15:20:01.275329000 1963-05-18 08:36:43.510414000 -17169348676948001283147717538724229.698 +-1297785520 -1216650714 fal jep sni 1981-08-18 23:31:45.157465000 1987-11-16 11:36:35.377907000 1334656845136835406301445799202982.823 +39824648 215332734 mhc qlg oau 1943-10-23 15:03:59.282783000 1939-03-17 18:35:27.517627000 14018982580880904208210215832465831.218 +\N 214939512 qcw oje cni 1935-05-02 15:22:53.148885000 1939-01-11 14:49:02.082757000 18297500477315684003645031984503322.73 +120435158 1646726354 bmh fql oju 1957-04-10 23:33:01.637868000 1963-07-09 11:37:51.858310000 15518281334723796142756311418339505.749 +1565591548 -1216388566 ido mhs bvq 1992-07-19 09:35:41.317011000 1987-12-12 13:07:09.551855000 -14332688794649442988256744477368672.288 +-1391896652 -1216781788 dxs kfq oje 1947-07-09 21:33:36.328074000 1987-10-08 09:20:44.116985000 -10054170898214663192821928325331180.776 +-1311286142 215201660 nid rmh kvq 1969-06-18 05:43:44.817057000 1939-03-04 17:50:10.430653000 -12823005367864945624754324583305682.684 +134066854 1646988502 epk ito wrd 1968-03-11 09:36:57.797414000 1938-12-29 14:03:44.995783000 -138679632120876822845554550042834.29 +\N 1646595280 ytf grm kfq 1996-04-04 16:05:18.362303000 1963-05-31 09:22:00.597388000 -17127809668244750993871651362490855.994 +1471480416 -1216519640 jep nit wrm 1981-09-01 00:17:02.244439000 1987-11-29 12:21:52.464881000 1376195853840085695577511975436356.527 +1552090926 215463808 qlg upk sey 1943-11-05 15:49:16.369757000 1987-09-25 08:35:27.030011000 14060521589584154497486282008699204.922 +-1297654446 215070586 ugb sni grm 1935-05-15 16:08:10.235859000 1939-01-24 15:34:19.169731000 18339039486018934292921098160736696.434 +39955722 1646857428 fql jup sny 1957-04-24 00:18:18.724842000 1963-07-22 12:23:08.945284000 15559820343427046432032377594572879.453 +120566232 -1216257492 mhs qlw fau 1992-08-01 10:20:58.403985000 1963-05-18 08:36:43.510414000 -14291149785946192698980678301135298.584 +\N -1216650714 hcw oju sni 1947-07-22 22:18:53.415048000 1987-10-21 10:06:01.203959000 -10012631889511412903545862149097807.072 +1565722622 215332734 rmh vql oau 1969-07-01 06:29:01.904031000 1939-03-17 18:35:27.517627000 -12781466359161695335478258407072308.98 +-1391765578 214677364 ito mxs bvh 1968-03-24 10:22:14.884388000 1939-01-11 14:49:02.082757000 -97140623417626533569488373809460.586 +-1311155068 1646726354 dxj kvq oju 1996-04-17 16:50:35.449277000 1963-06-13 10:07:17.684362000 -17086270659541500704595585186257482.29 +134197928 -1216388566 nit rmx bvq 1981-09-14 01:02:19.331413000 1987-12-12 13:07:09.551855000 1417734862543335984853578151669730.231 +1471611490 -1217043936 upk yto gbv 1943-11-18 16:34:33.456731000 1987-10-08 09:20:44.116985000 14102060598287404786762348184932578.626 +\N 215201660 ykf wrm kvq 1935-05-28 16:53:27.322833000 1939-02-06 16:19:36.256705000 18380578494722184582197164336970070.138 +1552222000 1646988502 jup nyt wrd 1957-05-07 01:03:35.811816000 1938-12-29 14:03:44.995783000 15601359352130296721308443770806253.157 +-1297523372 1646333132 qlw upb cwi 1992-08-14 11:06:15.490959000 1963-05-31 09:22:00.597388000 -14249610777242942409704612124901924.88 +40086796 -1216519640 lgb sny wrm 1947-08-04 23:04:10.502022000 1987-11-03 10:51:18.290933000 -9971092880808162614269795972864433.368 +120697306 215463808 vql aup sey 1969-07-14 07:14:18.991005000 1987-09-25 08:35:27.030011000 -12739927350458445046202192230838935.276 +1565853696 214808438 mxs qcw xje 1968-04-06 11:07:31.971362000 1939-01-24 15:34:19.169731000 -55601614714376244293422197576086.882 +\N 1646857428 hcn oau sny 1996-04-30 17:35:52.536251000 1963-06-26 10:52:34.771336000 -17044731650838250415319519010024108.586 +-1391634504 -1216257492 rmx vqc fau 1981-09-27 01:47:36.418387000 1963-05-18 08:36:43.510414000 1459273871246586274129644327903103.935 +-1311023994 -1216912862 yto dxs kfa 1943-12-01 17:19:50.543705000 1987-10-21 10:06:01.203959000 14143599606990655076038414361165952.33 +134329002 215332734 doj bvq oau 1935-06-10 17:38:44.409807000 1939-02-19 17:04:53.343679000 18422117503425434871473230513203443.842 +1471742564 214677364 nyt rdx bvh 1957-05-20 01:48:52.898790000 1939-01-11 14:49:02.082757000 15642898360833547010584509947039626.861 +1552353074 1646464206 upb ytf gbm 1992-08-27 11:51:32.577933000 1963-06-13 10:07:17.684362000 -14208071768539692120428545948668551.176 +\N -1216388566 pkf wrd bvq 1947-08-17 23:49:27.588996000 1987-11-16 11:36:35.377907000 -9929553872104912324993729796631059.664 +-1297392298 -1217043936 aup eyt gbv 1969-07-27 07:59:36.077979000 1987-10-08 09:20:44.116985000 -12698388341755194756926126054605561.572 +40217870 214939512 qcw ugb cni 1968-04-19 11:52:49.058336000 1939-02-06 16:19:36.256705000 -14062606011125955017356021342713.178 +120828380 1646988502 lgr sey wrd 1996-05-13 18:21:09.623225000 1963-07-09 11:37:51.858310000 -17003192642135000126043452833790734.882 +1565984770 1646333132 vqc aug cwi 1981-10-10 02:32:53.505361000 1963-05-31 09:22:00.597388000 1500812879949836563405710504136477.639 +-1391503430 -1216781788 dxs hcw oje 1943-12-14 18:05:07.630679000 1987-11-03 10:51:18.290933000 14185138615693905365314480537399326.034 +\N 215463808 hsn fau sey 1935-06-23 18:24:01.496781000 1939-03-04 17:50:10.430653000 18463656512128685160749296689436817.546 +-1310892920 214808438 rdx vhc xje 1957-06-02 02:34:09.985763000 1939-01-24 15:34:19.169731000 15684437369536797299860576123273000.565 +134460076 1646595280 ytf dxj kfq 1992-09-09 12:36:49.664907000 1963-06-26 10:52:34.771336000 -14166532759836441831152479772435177.472 +1471873638 -1216257492 toj bvh fau 1947-08-31 00:34:44.675970000 1987-11-29 12:21:52.464881000 -9888014863401662035717663620397685.96 +1552484148 -1216912862 eyt idx kfa 1946-05-24 04:27:57.656327000 1987-10-21 10:06:01.203959000 -12656849333051944467650059878372187.868 +-1297261224 215070586 ugb ykf grm 1968-05-02 12:38:06.145310000 1939-02-19 17:04:53.343679000 -21240171529866529632202202809594852.69 +\N 214677364 pkv wid bvh 1996-05-26 19:06:26.710199000 1963-07-22 12:23:08.945284000 -16961653633431749836767386657557361.178 +40348944 1646464206 aug eyk gbm 1958-07-31 10:38:40.835517000 1963-06-13 10:07:17.684362000 1542351888653086852681776680369851.343 +120959454 -1216650714 hcw lgb sni 1943-12-27 18:50:24.717653000 1987-11-16 11:36:35.377907000 14226677624397155654590546713632699.738 +1566115844 -1217043936 lwr jey gbv 1935-07-06 19:09:18.583754000 1939-03-17 18:35:27.517627000 18505195520831935450025362865670191.25 +-1391372356 214939512 vhc alg cni 1970-10-14 05:11:58.262898000 1939-02-06 16:19:36.256705000 15725976378240047589136642299506374.269 +-1310761846 1646726354 dxj hcn oju 1992-09-22 13:22:06.751881000 1963-07-09 11:37:51.858310000 -14124993751133191541876413596201803.768 +\N 1646333132 xsn fal cwi 1947-09-13 01:20:01.762944000 1987-12-12 13:07:09.551855000 -9846475854698411746441597444164312.256 +134591150 -1216781788 idx mhc oje 1946-06-06 05:13:14.743301000 1987-11-03 10:51:18.290933000 -12615310324348694178373993702138814.164 +1472004712 215201660 ykf doj kvq 1968-05-15 13:23:23.232284000 1939-03-04 17:50:10.430653000 -21198632521163279342926136633361478.986 +1552615222 214808438 toa bmh xje 1996-06-08 19:51:43.797173000 1938-12-29 14:03:44.995783000 -16920114624728499547491320481323987.474 +-1297130150 1646595280 eyk ido kfq 1958-08-13 11:23:57.922491000 1963-06-26 10:52:34.771336000 1583890897356337141957842856603225.047 +40480018 -1216519640 lgb pkf wrm 1944-01-09 19:35:41.804627000 1987-11-29 12:21:52.464881000 14268216633100405943866612889866073.442 +\N -1216912862 pbv nid kfa 1935-07-19 19:54:35.670728000 1987-09-25 08:35:27.030011000 18546734529535185739301429041903564.954 +121090528 215070586 alg epk grm 1970-10-27 05:57:15.349872000 1939-02-19 17:04:53.343679000 15767515386943297878412708475739747.973 +1566246918 1646857428 hcn lgr sny 1992-10-05 14:07:23.838855000 1963-07-22 12:23:08.945284000 -14083454742429941252600347419968430.064 +-1391241282 1646464206 cwr jep gbm 1947-09-26 02:05:18.849918000 1963-05-18 08:36:43.510414000 -9804936845995161457165531267930938.552 +-1310630772 -1216650714 mhc qlg sni 1946-06-19 05:58:31.830275000 1987-11-16 11:36:35.377907000 -12573771315645443889097927525905440.46 +134722224 215332734 doj hsn oau 1968-05-28 14:08:40.319258000 1939-03-17 18:35:27.517627000 -21157093512460029053650070457128105.282 +\N 214939512 xse fql cni 1996-06-21 20:37:00.884147000 1939-01-11 14:49:02.082757000 -16878575616025249258215254305090613.77 +1472135786 1646726354 ido mhs oju 1958-08-26 12:09:15.009465000 1963-07-09 11:37:51.858310000 1625429906059587431233909032836598.751 +1552746296 -1216388566 pkf toj bvq 1944-01-22 20:20:58.891601000 1987-12-12 13:07:09.551855000 14309755641803656233142679066099447.146 +-1296999076 -1216781788 tfa rmh oje 1935-08-01 20:39:52.757702000 1987-10-08 09:20:44.116985000 18588273538238436028577495218136938.658 +40611092 215201660 epk ito kvq 1970-11-09 06:42:32.436846000 1939-03-04 17:50:10.430653000 15809054395646548167688774651973121.677 +121221602 1646988502 lgr pkv wrd 1992-10-18 14:52:40.925829000 1938-12-29 14:03:44.995783000 -14041915733726690963324281243735056.36 +\N 1646595280 gbv nit kfq 1947-10-09 02:50:35.936892000 1963-05-31 09:22:00.597388000 -9763397837291911167889465091697564.848 +1566377992 -1216519640 qlg upk wrm 1946-07-02 06:43:48.917249000 1987-11-29 12:21:52.464881000 -12532232306942193599821861349672066.756 +-1391110208 215463808 hsn lwr sey 1968-06-10 14:53:57.406232000 1987-09-25 08:35:27.030011000 -21115554503756778764374004280894731.578 +-1310499698 215070586 cwi jup grm 1996-07-04 21:22:17.971121000 1939-01-24 15:34:19.169731000 -16837036607321998968939188128857240.066 +134853298 1646857428 mhs qlw sny 1958-09-08 12:54:32.096439000 1963-07-22 12:23:08.945284000 1666968914762837720509975209069972.455 +1472266860 -1216257492 toj xsn fau 1944-02-04 21:06:15.978575000 1963-05-18 08:36:43.510414000 14351294650506906522418745242332820.85 +\N -1216650714 xje vql sni 1935-08-14 21:25:09.844676000 1987-10-21 10:06:01.203959000 18629812546941686317853561394370312.362 +1552877370 215332734 ito mxs oau 1970-11-22 07:27:49.523820000 1939-03-17 18:35:27.517627000 15850593404349798456964840828206495.381 +-1296868002 214677364 pkv toa bvh 1992-10-31 15:37:58.012803000 1939-01-11 14:49:02.082757000 -14000376725023440674048215067501682.656 +40742166 1646726354 kfa rmx oju 1947-10-22 03:35:53.023866000 1963-06-13 10:07:17.684362000 -9721858828588660878613398915464191.144 +121352676 -1216388566 upk yto bvq 1946-07-15 07:29:06.004223000 1987-12-12 13:07:09.551855000 -12490693298238943310545795173438693.052 +1566509066 -1217043936 lwr pbv gbv 1968-06-23 15:39:14.493206000 1987-10-08 09:20:44.116985000 -21074015495053528475097938104661357.874 +\N 215201660 gbm nyt kvq 1996-07-17 22:07:35.058095000 1939-02-06 16:19:36.256705000 -16795497598618748679663121952623866.362 +-1390979134 1646988502 qlw upb wrd 1958-09-21 13:39:49.183413000 1938-12-29 14:03:44.995783000 1708507923466088009786041385303346.159 +-1310368624 1646333132 xsn cwr cwi 1944-02-17 21:51:33.065548000 1963-05-31 09:22:00.597388000 14392833659210156811694811418566194.554 +134984372 -1216519640 cni aup wrm 1935-08-27 22:10:26.931650000 1987-11-03 10:51:18.290933000 18671351555644936607129627570603686.066 +1472397934 215463808 mxs qcw sey 1970-12-05 08:13:06.610794000 1987-09-25 08:35:27.030011000 15892132413053048746240907004439869.085 +1553008444 214808438 toa xse xje 1992-11-13 16:23:15.099777000 1939-01-24 15:34:19.169731000 -13958837716320190384772148891268308.952 +\N 1646857428 oje vqc sny 1947-11-04 04:21:10.110840000 1963-06-26 10:52:34.771336000 -9680319819885410589337332739230817.44 +-1296736928 -1216257492 yto dxs fau 1946-07-28 08:14:23.091197000 1963-05-18 08:36:43.510414000 -12449154289535693021269728997205319.348 +40873240 -1216912862 pbv tfa kfa 1968-07-06 16:24:31.580180000 1987-10-21 10:06:01.203959000 -21032476486350278185821871928427984.17 +121483750 215332734 kfq rdx oau 1996-07-30 22:52:52.145068000 1939-02-19 17:04:53.343679000 -16753958589915498390387055776390492.658 +1566640140 214677364 upb ytf bvh 1958-10-04 14:25:06.270386000 1939-01-11 14:49:02.082757000 1750046932169338299062107561536719.863 +-1390848060 1646464206 \N gbv gbm 1944-03-01 22:36:50.152522000 1963-06-13 10:07:17.684362000 14434372667913407100970877594799568.258 +\N -1216388566 cwr eyt bvq 1935-09-09 22:55:44.018624000 1987-11-16 11:36:35.377907000 18712890564348186896405693746837059.77 +-1310237550 -1217043936 grm ugb gbv 1970-12-18 08:58:23.697768000 1987-10-08 09:20:44.116985000 15933671421756299035516973180673242.789 +135115446 214939512 qcw cwi cni 1992-11-26 17:08:32.186751000 1939-02-06 16:19:36.256705000 -13917298707616940095496082715034935.248 +1472529008 1646988502 xse aug wrd 1947-11-17 05:06:27.197814000 1963-07-09 11:37:51.858310000 -9638780811182160300061266562997443.736 +1553139518 1646333132 sni hcw cwi 1946-08-10 08:59:40.178171000 1963-05-31 09:22:00.597388000 -12407615280832442731993662820971945.644 +-1296605854 -1216781788 dxs xje oje 1968-07-19 17:09:48.667154000 1987-11-03 10:51:18.290933000 -20990937477647027896545805752194610.466 +\N 215463808 tfa vhc sey 1996-08-12 23:38:09.232042000 1939-03-04 17:50:10.430653000 -16712419581212248101110989600157118.954 +41004314 214808438 oju dxj xje 1958-10-17 15:10:23.357360000 1939-01-24 15:34:19.169731000 1791585940872588588338173737770093.567 +121614824 1646595280 ytf kfa kfq 1944-03-14 23:22:07.239496000 1963-06-26 10:52:34.771336000 14475911676616657390246943771032941.962 +1566771214 -1216257492 gbv idx fau 1935-09-22 23:41:01.105598000 1987-11-29 12:21:52.464881000 18754429573051437185681759923070433.474 +-1390716986 -1216912862 kvq ykf kfa 1970-12-31 09:43:40.784742000 1987-10-21 10:06:01.203959000 15975210430459549324793039356906616.493 +-1310106476 215070586 ugb gbm grm 1992-12-09 17:53:49.273725000 1939-02-19 17:04:53.343679000 -13875759698913689806220016538801561.544 +\N 214677364 cwi eyk bvh 1947-11-30 05:51:44.284788000 1963-07-22 12:23:08.945284000 -9597241802478910010785200386764070.032 +135246520 1646464206 wrm lgb gbm 1946-08-23 09:44:57.265145000 1963-06-13 10:07:17.684362000 -12366076272129192442717596644738571.94 +1472660082 -1216650714 hcw cni sni 1968-08-01 17:55:05.754128000 1987-11-16 11:36:35.377907000 -20949398468943777607269739575961236.762 +1553270592 -1217043936 xje alg gbv 1996-08-26 00:23:26.319016000 1939-03-17 18:35:27.517627000 -16670880572508997811834923423923745.25 +-1296474780 214939512 sny hcn cni 1958-10-30 15:55:40.444334000 1939-02-06 16:19:36.256705000 1833124949575838877614239914003467.271 +41135388 1646726354 dxj oje oju 1944-03-28 00:07:24.326470000 1963-07-09 11:37:51.858310000 14517450685319907679523009947266315.666 +\N 1646333132 kfa mhc cwi 1935-10-06 00:26:18.192572000 1987-12-12 13:07:09.551855000 18795968581754687474957826099303807.178 +121745898 -1216781788 oau doj oje 1971-01-13 10:28:57.871716000 1987-11-03 10:51:18.290933000 16016749439162799614069105533139990.197 +1566902288 215201660 ykf kfq kvq 1992-12-22 18:39:06.360699000 1939-03-04 17:50:10.430653000 -13834220690210439516943950362568187.84 +-1390585912 214808438 gbm ido xje 1947-12-13 06:37:01.371762000 1938-12-29 14:03:44.995783000 -9555702793775659721509134210530696.328 +-1309975402 1646595280 bvq pkf kfq 1946-09-05 10:30:14.352119000 1963-06-26 10:52:34.771336000 -12324537263425942153441530468505198.236 +135377594 -1216519640 lgb grm wrm 1968-08-14 18:40:22.841102000 1987-11-29 12:21:52.464881000 -20907859460240527317993673399727863.058 +\N -1216912862 cni epk kfa 1996-09-08 01:08:43.405990000 1987-09-25 08:35:27.030011000 -16629341563805747522558857247690371.546 +1472791156 215070586 wrd lgr grm 1958-11-12 16:40:57.531308000 1939-02-19 17:04:53.343679000 1874663958279089166890306090236840.975 +1553401666 1646857428 hcn sni sny 1944-04-10 00:52:41.413444000 1963-07-22 12:23:08.945284000 14558989694023157968799076123499689.37 +-1296343706 1646464206 oje qlg gbm 1935-10-19 01:11:35.279546000 1963-05-18 08:36:43.510414000 18837507590457937764233892275537180.882 +41266462 -1216650714 sey hsn sni 1971-01-26 11:14:14.958690000 1987-11-16 11:36:35.377907000 16058288447866049903345171709373363.901 +121876972 215332734 doj oju oau 1993-01-04 19:24:23.447673000 1939-03-17 18:35:27.517627000 -13792681681507189227667884186334814.136 +\N 214939512 kfq mhs cni 1947-12-26 07:22:18.458736000 1939-01-11 14:49:02.082757000 -9514163785072409432233068034297322.624 +1567033362 1646726354 fau toj oju 1946-09-18 11:15:31.439093000 1963-07-09 11:37:51.858310000 -12282998254722691864165464292271824.532 +-1390454838 -1216388566 pkf kvq bvq 1968-08-27 19:25:39.928076000 1987-12-12 13:07:09.551855000 -20866320451537277028717607223494489.354 +-1309844328 -1216781788 grm \N oje 1996-09-21 01:54:00.492964000 1987-10-08 09:20:44.116985000 -16587802555102497233282791071456997.842 +135508668 215201660 bvh ito kvq 1958-11-25 17:26:14.618282000 1939-03-04 17:50:10.430653000 1916202966982339456166372266470214.679 +1472922230 1646988502 lgr pkv wrd 1944-04-23 01:37:58.500418000 1938-12-29 14:03:44.995783000 14600528702726408258075142299733063.074 +\N 1646595280 sni wrm kfq 1935-11-01 01:56:52.366520000 1963-05-31 09:22:00.597388000 18879046599161188053509958451770554.586 +1553532740 -1216519640 wid upk wrm 1971-02-08 11:59:32.045664000 1987-11-29 12:21:52.464881000 16099827456569300192621237885606737.605 +-1296212632 215463808 hsn lwr sey 1993-01-17 20:09:40.534647000 1987-09-25 08:35:27.030011000 -13751142672803938938391818010101440.432 +41397536 215070586 oju sny grm 1948-01-08 08:07:35.545710000 1939-01-24 15:34:19.169731000 -9472624776369159142957001858063948.92 +122008046 1646857428 jey qlw sny 1946-10-01 12:00:48.526067000 1963-07-22 12:23:08.945284000 \N +1567164436 -1216257492 toj xsn fau 1968-09-09 20:10:57.015050000 1963-05-18 08:36:43.510414000 -12241459246019441574889398116038450.828 +\N -1216650714 kvq oau sni 1996-10-04 02:39:17.579938000 1987-10-21 10:06:01.203959000 -20824781442834026739441541047261115.65 +-1390323764 215332734 fal mxs oau 1958-12-08 18:11:31.705256000 1939-03-17 18:35:27.517627000 -16546263546399246944006724895223624.138 +-1309713254 214677364 pkv toa bvh 1944-05-06 02:23:15.587392000 1939-01-11 14:49:02.082757000 1957741975685589745442438442703588.383 +135639742 1646726354 wrm bvq oju 1935-11-14 02:42:09.453494000 1963-06-13 10:07:17.684362000 14642067711429658547351208475966436.778 +1473053304 -1216388566 bmh yto bvq 1971-02-21 12:44:49.132638000 1987-12-12 13:07:09.551855000 18920585607864438342786024628003928.29 +1553663814 -1217043936 lwr pbv gbv 1993-01-30 20:54:57.621621000 1987-10-08 09:20:44.116985000 16141366465272550481897304061840111.309 +\N 215201660 sny wrd kvq 1948-01-21 08:52:52.632684000 1939-02-06 16:19:36.256705000 -13709603664100688649115751833868066.728 +-1296081558 1646988502 nid upb wrd 1946-10-14 12:46:05.613041000 1938-12-29 14:03:44.995783000 -9431085767665908853680935681830575.216 +41528610 1646333132 xsn cwr cwi 1968-09-22 20:56:14.102024000 1963-05-31 09:22:00.597388000 -12199920237316191285613331939805077.124 +122139120 -1216519640 oau sey wrm 1996-10-17 03:24:34.666912000 1987-11-03 10:51:18.290933000 -20783242434130776450165474871027741.946 +1567295510 215463808 jep qcw sey 1958-12-21 18:56:48.792230000 1987-09-25 08:35:27.030011000 -16504724537695996654730658718990250.434 +-1390192690 214808438 toa xse xje 1944-05-19 03:08:32.674366000 1939-01-24 15:34:19.169731000 1999280984388840034718504618936962.087 +\N 1646857428 bvq fau sny 1935-11-27 03:27:26.540468000 1963-06-26 10:52:34.771336000 14683606720132908836627274652199810.482 +-1309582180 -1216257492 fql dxs fau 1971-03-06 13:30:06.219612000 1963-05-18 08:36:43.510414000 18962124616567688632062090804237301.994 +135770816 -1216912862 pbv tfa kfa 1993-02-12 21:40:14.708595000 1987-10-21 10:06:01.203959000 16182905473975800771173370238073485.013 +1473184378 215332734 wrd bvh oau 1948-02-03 09:38:09.719658000 1939-02-19 17:04:53.343679000 -13668064655397438359839685657634693.024 +1553794888 214677364 rmh ytf bvh 1946-10-27 13:31:22.700015000 1939-01-11 14:49:02.082757000 -9389546758962658564404869505597201.512 +-1295950484 1646464206 cwr gbv gbm 1968-10-05 21:41:31.188998000 1963-06-13 10:07:17.684362000 -12158381228612940996337265763571703.42 +\N -1216388566 sey wid bvq 1996-10-30 04:09:51.753886000 1987-11-16 11:36:35.377907000 -20741703425427526160889408694794368.242 +41659684 -1217043936 nit ugb gbv 1959-01-03 19:42:05.879204000 1987-10-08 09:20:44.116985000 -16463185528992746365454592542756876.73 +122270194 214939512 xse cwi cni 1944-06-01 03:53:49.761340000 1939-02-06 16:19:36.256705000 2040819993092090323994570795170335.791 +1567426584 1646988502 fau jey wrd 1935-12-10 04:12:43.627442000 1963-07-09 11:37:51.858310000 14725145728836159125903340828433184.186 +-1390061616 1646333132 jup hcw cwi 1971-03-19 14:15:23.306586000 1963-05-31 09:22:00.597388000 19003663625270938921338156980470675.698 +-1309451106 -1216781788 tfa xje oje 1993-02-25 22:25:31.795569000 1987-11-03 10:51:18.290933000 16224444482679051060449436414306858.717 +\N 215463808 bvh fal sey 1948-02-16 10:23:26.806632000 1939-03-04 17:50:10.430653000 -13626525646694188070563619481401319.32 +135901890 214808438 vql dxj xje 1946-11-09 14:16:39.786989000 1939-01-24 15:34:19.169731000 -9348007750259408275128803329363827.808 +1473315452 1646595280 gbv kfa kfq 1968-10-18 22:26:48.275971000 1963-06-26 10:52:34.771336000 -12116842219909690707061199587338329.716 +1553925962 -1216257492 wid bmh fau 1996-11-12 04:55:08.840860000 1987-11-29 12:21:52.464881000 -20700164416724275871613342518560994.538 +-1295819410 -1216912862 rmx ykf \N 1959-01-16 20:27:22.966178000 1987-10-21 10:06:01.203959000 -16421646520289496076178526366523503.026 +41790758 215070586 cwi gbm kfa 1944-06-14 04:39:06.848314000 \N 2082359001795340613270636971403709.495 +\N 214677364 jey nid grm 1935-12-23 04:58:00.714416000 1939-02-19 17:04:53.343679000 14766684737539409415179407004666557.89 +122401268 1646464206 nyt lgb bvh 1971-04-01 15:00:40.393560000 1963-07-22 12:23:08.945284000 19045202633974189210614223156704049.402 +1567557658 -1216650714 xje cni gbm 1993-03-10 23:10:48.882543000 \N 16265983491382301349725502590540232.421 +-1389930542 -1217043936 fal jep sni 1948-02-29 11:08:43.893605000 1963-06-13 10:07:17.684362000 -13584986637990937781287553305167945.616 +-1309320032 214939512 aup hcn gbv 1946-11-22 15:01:56.873962000 1987-11-16 11:36:35.377907000 -9306468741556157985852737153130454.104 +136032964 1646726354 kfa oje cni 1968-10-31 23:12:05.362945000 1939-03-17 18:35:27.517627000 -12075303211206440417785133411104956.012 +\N 1646333132 bmh fql oju 1996-11-25 05:40:25.927834000 1939-02-06 16:19:36.256705000 -20658625408021025582337276342327620.834 +1473446526 -1216781788 vqc doj cwi 1959-01-29 21:12:40.053152000 1963-07-09 11:37:51.858310000 -16380107511586245786902460190290129.322 +1554057036 215201660 gbm kfq oje 1944-06-27 05:24:23.935288000 1987-12-12 13:07:09.551855000 2123898010498590902546703147637083.199 +-1295688336 214808438 nid upb kvq 1936-01-05 05:43:17.801390000 1987-11-03 10:51:18.290933000 14808223746242659704455473180899931.594 +41921832 1646595280 rdx pkf xje 1971-04-14 15:45:57.480534000 1939-03-04 17:50:10.430653000 19086741642677439499890289332937423.106 +122532342 -1216519640 cni grm kfq 1993-03-23 23:56:05.969517000 1938-12-29 14:03:44.995783000 16307522500085551639001568766773606.125 +\N -1216912862 jep qcw wrm 1948-03-13 11:54:00.980579000 1963-06-26 10:52:34.771336000 -13543447629287687492011487128934571.912 +1567688732 215070586 eyt lgr kfa 1946-12-05 15:47:13.960936000 1987-11-29 12:21:52.464881000 -9264929732852907696576670976897080.4 +-1389799468 1646857428 oje sni grm 1968-11-13 23:57:22.449919000 1987-09-25 08:35:27.030011000 -12033764202503190128509067234871582.308 +-1309188958 1646464206 fql dxs sny 1996-12-08 06:25:43.014808000 1939-02-19 17:04:53.343679000 -20617086399317775293061210166094247.13 +136164038 -1216650714 aug hsn gbm 1959-02-11 21:57:57.140126000 1963-07-22 12:23:08.945284000 -16338568502882995497626394014056755.618 +1473577600 215332734 kfq oju sni 1944-07-10 06:09:41.022262000 1963-05-18 08:36:43.510414000 2165437019201841191822769323870456.903 +\N 214939512 rmh ytf oau 1936-01-18 06:28:34.888364000 1987-11-16 11:36:35.377907000 14849762754945909993731539357133305.298 +1554188110 1646726354 vhc toj cni 1971-04-27 16:31:14.567508000 1939-03-17 18:35:27.517627000 19128280651380689789166355509170796.81 +-1295557262 -1216388566 grm kvq oju 1993-04-06 00:41:23.056491000 1939-01-11 14:49:02.082757000 16349061508788801928277634943006979.829 +42052906 -1216781788 nit ugb bvq 1948-03-26 12:39:18.067553000 1963-07-09 11:37:51.858310000 -13501908620584437202735420952701198.208 +122663416 215201660 idx pkv oje 1946-12-18 16:32:31.047910000 1987-12-12 13:07:09.551855000 -9223390724149657407300604800663706.696 +1567819806 1646988502 sni wrm kvq 1968-11-27 00:42:39.536893000 1987-10-08 09:20:44.116985000 -11992225193799939839233001058638208.604 +\N 1646595280 jup hcw wrd 1996-12-21 07:11:00.101782000 1939-03-04 17:50:10.430653000 -20575547390614525003785143989860873.426 +-1389668394 -1216519640 eyk lwr kfq 1959-02-24 22:43:14.227100000 1938-12-29 14:03:44.995783000 -16297029494179745208350327837823381.914 +-1309057884 215463808 oju sny wrm 1944-07-23 06:54:58.109236000 1963-05-31 09:22:00.597388000 2206976027905091481098835500103830.607 +136295112 215070586 vql dxj sey 1936-01-31 07:13:51.975338000 1987-11-29 12:21:52.464881000 14891301763649160283007605533366679.002 +1473708674 1646857428 alg xsn grm 1971-05-10 17:16:31.654482000 1987-09-25 08:35:27.030011000 19169819660083940078442421685404170.514 +1554319184 -1216257492 kvq oau sny 1993-04-19 01:26:40.143465000 1939-01-24 15:34:19.169731000 16390600517492052217553701119240353.533 +\N -1216650714 rmx ykf fau 1948-04-08 13:24:35.154527000 1963-07-22 12:23:08.945284000 -13460369611881186913459354776467824.504 +-1295426188 215332734 mhc toa sni 1946-12-31 17:17:48.134884000 1963-05-18 08:36:43.510414000 -9181851715446407118024538624430332.992 +42183980 214677364 wrm bvq oau 1968-12-10 01:27:56.623867000 1987-10-21 10:06:01.203959000 -11950686185096689549956934882404834.9 +122794490 1646726354 nyt lgb bvh 1997-01-03 07:56:17.188756000 1939-03-17 18:35:27.517627000 -20534008381911274714509077813627499.722 +1567950880 -1216388566 ido pbv oju 1959-03-09 23:28:31.314074000 1939-01-11 14:49:02.082757000 -16255490485476494919074261661590008.21 +-1389537320 -1217043936 sny wrd bvq 1944-08-05 07:40:15.196210000 1963-06-13 10:07:17.684362000 2248515036608341770374901676337204.311 +\N 215201660 aup hcn gbv 1936-02-13 07:59:09.062312000 1987-12-12 13:07:09.551855000 14932840772352410572283671709600052.706 +-1308926810 1646988502 epk cwr kvq 1971-05-23 18:01:48.741456000 1987-10-08 09:20:44.116985000 19211358668787190367718487861637544.218 +136426186 1646333132 oau sey wrd 1993-05-02 02:11:57.230438000 1939-02-06 16:19:36.256705000 16432139526195302506829767295473727.237 +1473839748 -1216519640 vqc doj cwi 1948-04-21 14:09:52.241501000 1938-12-29 14:03:44.995783000 -13418830603177936624183288600234450.8 +1554450258 215463808 qlg xse wrm 1947-01-13 18:03:05.221858000 1963-05-31 09:22:00.597388000 -9140312706743156828748472448196959.288 +-1295295114 214808438 bvq fau sey 1968-12-23 02:13:13.710841000 1987-11-03 10:51:18.290933000 -11909147176393439260680868706171461.196 +\N 1646857428 rdx pkf xje 1997-01-16 08:41:34.275730000 1987-09-25 08:35:27.030011000 -20492469373208024425233011637394126.018 +42315054 -1216257492 mhs tfa sny 1959-03-23 00:13:48.401048000 1939-01-24 15:34:19.169731000 -16213951476773244629798195485356634.506 +122925564 -1216912862 wrd bvh fau 1944-08-18 08:25:32.283184000 1963-06-26 10:52:34.771336000 2290054045311592059650967852570578.015 +1568081954 215332734 eyt lgr kfa 1936-02-26 08:44:26.149286000 1963-05-18 08:36:43.510414000 14974379781055660861559737885833426.41 +-1389406246 214677364 ito gbv oau \N 1987-10-21 10:06:01.203959000 19252897677490440656994554037870917.922 +-1308795736 1646464206 sey wid bvh 1971-06-05 18:47:05.828429000 1939-02-19 17:04:53.343679000 16473678534898552796105833471707100.941 +\N -1216388566 aug hsn gbm 1993-05-15 02:57:14.317412000 1939-01-11 14:49:02.082757000 -13377291594474686334907222424001077.096 +136557260 -1217043936 upk cwi bvq 1948-05-04 14:55:09.328475000 1963-06-13 10:07:17.684362000 -9098773698039906539472406271963585.584 +1473970822 214939512 fau jey gbv 1947-01-26 18:48:22.308832000 1987-11-16 11:36:35.377907000 -11867608167690188971404802529938087.492 +1554581332 1646988502 vhc toj cni 1969-01-05 02:58:30.797815000 1987-10-08 09:20:44.116985000 -20450930364504774135956945461160752.314 +-1295164040 1646333132 qlw xje wrd 1997-01-29 09:26:51.362704000 1939-02-06 16:19:36.256705000 -16172412468069994340522129309123260.802 +42446128 -1216781788 bvh fal cwi 1959-04-05 00:59:05.488022000 1963-07-09 11:37:51.858310000 2331593054014842348927034028803951.719 +\N 215463808 idx pkv oje 1944-08-31 09:10:49.370158000 1963-05-31 09:22:00.597388000 15015918789758911150835804062066800.114 +123056638 214808438 mxs kfa sey 1936-03-10 09:29:43.236260000 1987-11-03 10:51:18.290933000 19294436686193690946270620214104291.626 +1568213028 1646595280 wid bmh xje 1971-06-18 19:32:22.915403000 1939-03-04 17:50:10.430653000 16515217543601803085381899647940474.645 +-1389275172 -1216257492 eyk lwr kfq 1993-05-28 03:42:31.404386000 1939-01-24 15:34:19.169731000 -13335752585771436045631156247767703.392 +-1308664662 -1216912862 yto gbm fau 1948-05-17 15:40:26.415449000 1963-06-26 10:52:34.771336000 -9057234689336656250196340095730211.88 +136688334 215070586 jey nid kfa 1947-02-08 19:33:39.395806000 1987-11-29 12:21:52.464881000 -11826069158986938682128736353704713.788 +\N 214677364 alg xsn grm 1969-01-18 03:43:47.884789000 1987-10-21 10:06:01.203959000 -20409391355801523846680879284927378.61 +1474101896 1646464206 upb cni bvh 1997-02-11 10:12:08.449678000 1939-02-19 17:04:53.343679000 -16130873459366744051246063132889887.098 +1554712406 -1216650714 fal jep gbm 1959-04-18 01:44:22.574996000 1963-07-22 12:23:08.945284000 2373132062718092638203100205037325.423 +-1295032966 -1217043936 mhc toa sni 1944-09-13 09:56:06.457132000 1963-06-13 10:07:17.684362000 15057457798462161440111870238300173.818 +42577202 214939512 qcw oje gbv 1936-03-23 10:15:00.323234000 1987-11-16 11:36:35.377907000 19335975694896941235546686390337665.33 +123187712 1646726354 bmh fql cni 1971-07-01 20:17:40.002377000 1939-03-17 18:35:27.517627000 16556756552305053374657965824173848.349 +\N 1646333132 ido pbv oju 1993-06-10 04:27:48.491360000 1939-02-06 16:19:36.256705000 -13294213577068185756355090071534329.688 +1568344102 -1216781788 dxs kfq cwi 1948-05-30 16:25:43.502423000 1963-07-09 11:37:51.858310000 -9015695680633405960920273919496838.176 +-1389144098 215201660 nid rmh oje 1947-02-21 20:18:56.482780000 1987-12-12 13:07:09.551855000 -11784530150283688392852670177471340.084 +-1308533588 214808438 epk cwr kvq 1969-01-31 04:29:04.971763000 1987-11-03 10:51:18.290933000 -20367852347098273557404813108694004.906 +136819408 1646595280 ytf grm xje 1997-02-24 10:57:25.536652000 1939-03-04 17:50:10.430653000 -16089334450663493761969996956656513.394 +1474232970 -1216519640 jep nit kfq 1959-05-01 02:29:39.661970000 1938-12-29 14:03:44.995783000 2414671071421342927479166381270699.127 +\N -1216912862 qlg xse wrm 1944-09-26 10:41:23.544106000 1963-06-26 10:52:34.771336000 15098996807165411729387936414533547.522 +1554843480 215070586 \N sni kfa 1936-04-05 11:00:17.410208000 1987-11-29 12:21:52.464881000 19377514703600191524822752566571039.034 +-1294901892 1646857428 ugb jup grm 1971-07-14 21:02:57.089351000 1987-09-25 08:35:27.030011000 16598295561008303663934032000407222.053 +42708276 1646464206 fql tfa sny 1993-06-23 05:13:05.578334000 1939-02-19 17:04:53.343679000 -13252674568364935467079023895300955.984 +123318786 -1216650714 mhs oju gbm 1948-06-12 17:11:00.589397000 1963-07-22 12:23:08.945284000 -8974156671930155671644207743263464.472 +1568475176 215332734 hcw vql sni 1947-03-06 21:04:13.569754000 1963-05-18 08:36:43.510414000 -11742991141580438103576604001237966.38 +\N 214939512 rmh gbv oau 1969-02-13 05:14:22.058737000 1987-11-16 11:36:35.377907000 -20326313338395023268128746932460631.202 +-1389013024 1646726354 ito kvq cni 1997-03-09 11:42:42.623626000 1939-03-17 18:35:27.517627000 -16047795441960243472693930780423139.69 +-1308402514 -1216388566 dxj rmx oju 1959-05-14 03:14:56.748944000 1939-01-11 14:49:02.082757000 2456210080124593216755232557504072.831 +136950482 -1216781788 nit cwi bvq 1944-10-09 11:26:40.631080000 1963-07-09 11:37:51.858310000 15140535815868662018664002590766921.226 +1474364044 215201660 upk wrm oje 1936-04-18 11:45:34.497182000 1987-12-12 13:07:09.551855000 19419053712303441814098818742804412.738 +1554974554 1646988502 ykf nyt kvq 1971-07-27 21:48:14.176325000 1987-10-08 09:20:44.116985000 16639834569711553953210098176640595.757 +\N 1646595280 jup xje wrd 1993-07-06 05:58:22.665308000 1939-03-04 17:50:10.430653000 -13211135559661685177802957719067582.28 +-1294770818 -1216519640 qlw sny kfq 1948-06-25 17:56:17.676371000 1938-12-29 14:03:44.995783000 -8932617663226905382368141567030090.768 +42839350 215463808 lgb aup wrm 1947-03-19 21:49:30.656728000 1963-05-31 09:22:00.597388000 -11701452132877187814300537825004592.676 +123449860 215070586 vql kfa sey 1969-02-26 05:59:39.145711000 1987-11-29 12:21:52.464881000 -20284774329691772978852680756227257.498 +1568606250 1646857428 mxs oau grm 1997-03-22 12:27:59.710600000 1987-09-25 08:35:27.030011000 -16006256433256993183417864604189765.986 +-1388881950 -1216257492 hcn vqc sny 1959-05-27 04:00:13.835918000 1939-01-24 15:34:19.169731000 \N +\N -1216650714 rmx gbm fau 1944-10-22 12:11:57.718054000 1963-07-22 12:23:08.945284000 2497749088827843506031298733737446.535 +-1308271440 215332734 yto bvq sni 1936-05-01 12:30:51.584155000 1963-05-18 08:36:43.510414000 15182074824571912307940068767000294.93 +137081556 214677364 doj rdx oau 1971-08-09 22:33:31.263299000 1987-10-21 10:06:01.203959000 19460592721006692103374884919037786.442 +1474495118 1646726354 nyt cni bvh 1993-07-19 06:43:39.752282000 1939-03-17 18:35:27.517627000 16681373578414804242486164352873969.461 +1555105628 -1216388566 upb wrd oju 1948-07-08 18:41:34.763345000 1939-01-11 14:49:02.082757000 -13169596550958434888526891542834208.576 +-1294639744 -1217043936 pkf eyt bvq 1947-04-01 22:34:47.743702000 1963-06-13 10:07:17.684362000 -8891078654523655093092075390796717.064 +\N 215201660 aup oje gbv 1969-03-11 06:44:56.232685000 1987-12-12 13:07:09.551855000 -11659913124173937525024471648771218.972 +42970424 1646988502 qcw sey kvq 1997-04-04 13:13:16.797574000 1987-10-08 09:20:44.116985000 -20243235320988522689576614579993883.794 +123580934 1646333132 lgr aug wrd 1959-06-09 04:45:30.922892000 1939-02-06 16:19:36.256705000 -15964717424553742894141798427956392.282 +1568737324 -1216519640 vqc kfq cwi 1944-11-04 12:57:14.805028000 1938-12-29 14:03:44.995783000 2539288097531093795307364909970820.239 +-1388750876 215463808 dxs fau wrm 1936-05-14 13:16:08.671129000 1963-05-31 09:22:00.597388000 15223613833275162597216134943233668.634 +-1308140366 214808438 hsn vhc sey 1971-08-22 23:18:48.350273000 1987-11-03 10:51:18.290933000 19502131729709942392650951095271160.146 +\N 1646857428 rdx grm xje 1993-08-01 07:28:56.839256000 1987-09-25 08:35:27.030011000 16722912587118054531762230529107343.165 +137212630 -1216257492 ytf bvh sny 1948-07-21 19:26:51.850319000 1939-01-24 15:34:19.169731000 -13128057542255184599250825366600834.872 +1474626192 -1216912862 toj idx fau 1947-04-14 23:20:04.830676000 1963-06-26 10:52:34.771336000 -8849539645820404803816009214563343.36 +1555236702 215332734 eyt sni kfa 1969-03-24 07:30:13.319659000 1963-05-18 08:36:43.510414000 -11618374115470687235748405472537845.268 +-1294508670 214677364 ugb wid oau 1997-04-17 13:58:33.884548000 1987-10-21 10:06:01.203959000 -20201696312285272400300548403760510.09 +43101498 1646464206 pkv eyk bvh 1959-06-22 05:30:48.009866000 1939-02-19 17:04:53.343679000 -15923178415850492604865732251723018.578 +\N -1216388566 aug oju gbm 1944-11-17 13:42:31.892002000 1939-01-11 14:49:02.082757000 2580827106234344084583431086204193.943 +123712008 -1217043936 hcw jey bvq 1936-05-27 14:01:25.758103000 1963-06-13 10:07:17.684362000 15265152841978412886492201119467042.338 +1568868398 214939512 lwr alg gbv 1971-09-05 00:04:05.437247000 1987-11-16 11:36:35.377907000 19543670738413192681927017271504533.85 +-1388619802 1646988502 vhc kvq cni 1993-08-14 08:14:13.926230000 1987-10-08 09:20:44.116985000 16764451595821304821038296705340716.869 +-1308009292 1646333132 dxj fal wrd 1948-08-03 20:12:08.937293000 1939-02-06 16:19:36.256705000 -13086518533551934309974759190367461.168 +137343704 -1216781788 xsn mhc cwi 1947-04-28 00:05:21.917650000 1963-07-09 11:37:51.858310000 -8808000637117154514539943038329969.656 +\N 215463808 idx wrm oje 1969-04-06 08:15:30.406633000 1963-05-31 09:22:00.597388000 -11576835106767436946472339296304471.564 +1474757266 214808438 ykf bmh sey 1997-04-30 14:43:50.971522000 1987-11-03 10:51:18.290933000 -20160157303582022111024482227527136.386 +1555367776 1646595280 toa ido xje 1959-07-05 06:16:05.096840000 1939-03-04 17:50:10.430653000 -15881639407147242315589666075489644.874 +-1294377596 -1216257492 eyk sny kfq 1944-11-30 14:27:48.978976000 1939-01-24 15:34:19.169731000 2622366114937594373859497262437567.647 +43232572 -1216912862 lgb nid fau 1936-06-09 14:46:42.845077000 1963-06-26 10:52:34.771336000 15306691850681663175768267295700416.042 +123843082 215070586 pbv epk kfa 1971-09-18 00:49:22.524221000 1987-11-29 12:21:52.464881000 19585209747116442971203083447737907.554 +\N 214677364 alg oau grm 1993-08-27 08:59:31.013204000 1987-10-21 10:06:01.203959000 16805990604524555110314362881574090.573 +1568999472 1646464206 hcn jep bvh 1948-08-16 20:57:26.024267000 1939-02-19 17:04:53.343679000 -13044979524848684020698693014134087.464 +-1388488728 -1216650714 cwr qlg gbm 1947-05-11 00:50:39.004624000 1963-07-22 12:23:08.945284000 -8766461628413904225263876862096595.952 +-1307878218 -1217043936 mhc bvq sni 1969-04-19 09:00:47.493607000 1963-06-13 10:07:17.684362000 -11535296098064186657196273120071097.86 +137474778 214939512 doj fql gbv 1997-05-13 15:29:08.058495000 1987-11-16 11:36:35.377907000 -20118618294878771821748416051293762.682 +1474888340 1646726354 xse mhs cni 1959-07-18 07:01:22.183813000 1939-03-17 18:35:27.517627000 -15840100398443992026313599899256271.17 +\N 1646333132 ido wrd oju 1944-12-13 15:13:06.065949000 1939-02-06 16:19:36.256705000 2663905123640844663135563438670941.351 +1555498850 -1216781788 pkf rmh cwi 1936-06-22 15:31:59.932051000 1963-07-09 11:37:51.858310000 15348230859384913465044333471933789.746 +-1294246522 215201660 tfa ito oje 1971-10-01 01:34:39.611195000 1987-12-12 13:07:09.551855000 19626748755819693260479149623971281.258 +43363646 214808438 epk sey kvq 1993-09-09 09:44:48.100178000 1987-11-03 10:51:18.290933000 16847529613227805399590429057807464.277 +123974156 1646595280 lgr nit xje 1948-08-29 21:42:43.111241000 1939-03-04 17:50:10.430653000 -13003440516145433731422626837900713.76 +1569130546 -1216519640 gbv upk kfq 1947-05-24 01:35:56.091598000 1938-12-29 14:03:44.995783000 -8724922619710653935987810685863222.248 +\N -1216912862 qlg fau wrm 1969-05-02 09:46:04.580581000 1963-06-26 10:52:34.771336000 -11493757089360936367920206943837724.156 +-1388357654 215070586 hsn jup kfa 1997-05-26 16:14:25.145469000 1987-11-29 12:21:52.464881000 -20077079286175521532472349875060388.978 +-1307747144 1646857428 cwi qlw grm 1959-07-31 07:46:39.270787000 1987-09-25 08:35:27.030011000 -15798561389740741737037533723022897.466 +137605852 1646464206 mhs bvh sny 1944-12-26 15:58:23.152923000 1939-02-19 17:04:53.343679000 2705444132344094952411629614904315.055 +1475019414 -1216650714 toj vql gbm 1936-07-05 16:17:17.019025000 1963-07-22 12:23:08.945284000 15389769868088163754320399648167163.45 +1555629924 215332734 xje mxs sni 1971-10-14 02:19:56.698169000 1963-05-18 08:36:43.510414000 19668287764522943549755215800204654.962 +\N 214939512 ito wid oau 1993-09-22 10:30:05.187152000 1987-11-16 11:36:35.377907000 16889068621931055688866495234040837.981 +-1294115448 1646726354 pkv rmx cni 1948-09-11 22:28:00.198215000 1939-03-17 18:35:27.517627000 -12961901507442183442146560661667340.056 +43494720 -1216388566 kfa yto oju 1947-06-06 02:21:13.178572000 1939-01-11 14:49:02.082757000 -8683383611007403646711744509629848.544 +124105230 -1216781788 upk jey bvq 1969-05-15 10:31:21.667555000 1963-07-09 11:37:51.858310000 -11452218080657686078644140767604350.452 +1569261620 215201660 lwr nyt oje 1997-06-08 16:59:42.232443000 1987-12-12 13:07:09.551855000 -20035540277472271243196283698827015.274 +-1388226580 1646988502 gbm upb kvq 1959-08-13 08:31:56.357761000 1987-10-08 09:20:44.116985000 -15757022381037491447761467546789523.762 +\N 1646595280 qlw fal wrd 1945-01-08 16:43:40.239897000 1939-03-04 17:50:10.430653000 2746983141047345241687695791137688.759 +-1307616070 -1216519640 xsn aup kfq 1936-07-18 17:02:34.105999000 1938-12-29 14:03:44.995783000 15431308876791414043596465824400537.154 +137736926 215463808 cni qcw wrm 1971-10-27 03:05:13.785143000 1963-05-31 09:22:00.597388000 19709826773226193839031281976438028.666 +1475150488 215070586 mxs bmh sey 1993-10-05 11:15:22.274126000 1987-11-29 12:21:52.464881000 16930607630634305978142561410274211.685 +1555760998 1646857428 toa vqc grm 1948-09-24 23:13:17.285189000 1987-09-25 08:35:27.030011000 -12920362498738933152870494485433966.352 +-1293984374 -1216257492 oje dxs sny 1947-06-19 03:06:30.265546000 1939-01-24 15:34:19.169731000 -8641844602304153357435678333396474.84 +\N -1216650714 yto nid fau 1969-05-28 11:16:38.754529000 1963-07-22 12:23:08.945284000 -11410679071954435789368074591370976.748 +43625794 215332734 pbv rdx sni 1997-06-21 17:44:59.319417000 1963-05-18 08:36:43.510414000 -19994001268769020953920217522593641.57 +124236304 214677364 kfq ytf oau 1959-08-26 09:17:13.444735000 1987-10-21 10:06:01.203959000 -15715483372334241158485401370556150.058 +1569392694 1646726354 upb jep bvh 1945-01-21 17:28:57.326871000 1939-03-17 18:35:27.517627000 2788522149750595530963761967371062.463 +-1388095506 -1216388566 cwr eyt oju 1936-07-31 17:47:51.192973000 1939-01-11 14:49:02.082757000 15472847885494664332872532000633910.858 +-1307484996 -1217043936 grm ugb bvq 1971-11-09 03:50:30.872117000 1963-06-13 10:07:17.684362000 19751365781929444128307348152671402.37 +\N 215201660 qcw fql gbv 1993-10-18 12:00:39.361100000 1987-12-12 13:07:09.551855000 16972146639337556267418627586507585.389 +137868000 1646988502 xse aug kvq 1948-10-07 23:58:34.372163000 1987-10-08 09:20:44.116985000 -12878823490035682863594428309200592.648 +1475281562 1646333132 sni hcw wrd 1947-07-02 03:51:47.352520000 1939-02-06 16:19:36.256705000 -8600305593600903068159612157163101.136 +1555892072 -1216519640 dxs rmh cwi 1969-06-10 12:01:55.841503000 1938-12-29 14:03:44.995783000 -11369140063251185500092008415137603.044 +-1293853300 215463808 tfa vhc wrm 1997-07-04 18:30:16.406391000 1963-05-31 09:22:00.597388000 -19952462260065770664644151346360267.866 +43756868 214808438 oju dxj sey 1959-09-08 10:02:30.531709000 1987-11-03 10:51:18.290933000 -15673944363630990869209335194322776.354 +124367378 1646857428 ytf nit xje 1945-02-03 18:14:14.413845000 1987-09-25 08:35:27.030011000 2830061158453845820239828143604436.167 +1569523768 -1216257492 gbv idx sny 1936-08-13 18:33:08.279947000 1939-01-24 15:34:19.169731000 15514386894197914622148598176867284.562 +-1387964432 -1216912862 kvq ykf fau 1971-11-22 04:35:47.959091000 1963-06-26 10:52:34.771336000 19792904790632694417583414328904776.074 +-1307353922 215332734 ugb jup kfa 1993-10-31 12:45:56.448074000 1963-05-18 08:36:43.510414000 17013685648040806556694693762740959.093 +137999074 214677364 cwi eyk oau 1948-10-21 00:43:51.459137000 1987-10-21 10:06:01.203959000 -12837284481332432574318362132967218.944 +1475412636 1646464206 wrm lgb bvh 1947-07-15 04:37:04.439494000 1939-02-19 17:04:53.343679000 -8558766584897652778883545980929727.432 +1556023146 -1216388566 hcw vql gbm 1969-06-23 12:47:12.928477000 1939-01-11 14:49:02.082757000 -11327601054547935210815942238904229.34 +-1293722226 -1217043936 xje alg bvq 1997-07-17 19:15:33.493365000 1963-06-13 10:07:17.684362000 -19910923251362520375368085170126894.162 +43887942 214939512 sny hcn gbv 1959-09-21 10:47:47.618683000 1987-11-16 11:36:35.377907000 -15632405354927740579933269018089402.65 +124498452 1646988502 dxj rmx cni 1945-02-16 18:59:31.500819000 1987-10-08 09:20:44.116985000 2871600167157096109515894319837809.871 +1569654842 1646333132 kfa mhc wrd 1936-08-26 19:18:25.366921000 1939-02-06 16:19:36.256705000 15555925902901164911424664353100658.266 +-1387833358 -1216781788 oau doj cwi 1971-12-05 05:21:05.046065000 1963-07-09 11:37:51.858310000 19834443799335944706859480505138149.778 +-1307222848 215463808 ykf nyt oje 1993-11-13 13:31:13.535048000 1963-05-31 09:22:00.597388000 17055224656744056845970759938974332.797 +138130148 214808438 gbm ido sey 1948-11-03 01:29:08.546111000 1987-11-03 10:51:18.290933000 \N +1475543710 1646595280 bvq pkf xje 1947-07-28 05:22:21.526468000 1939-03-04 17:50:10.430653000 -12795745472629182285042295956733845.24 +1556154220 -1216257492 lgb aup kfq 1969-07-06 13:32:30.015451000 1939-01-24 15:34:19.169731000 -8517227576194402489607479804696353.728 +-1293591152 -1216912862 cni epk fau 1997-07-30 20:00:50.580339000 1963-06-26 10:52:34.771336000 -11286062045844684921539876062670855.636 +44019016 215070586 wrd lgr kfa 1959-10-04 11:33:04.705657000 1987-11-29 12:21:52.464881000 -19869384242659270086092018993893520.458 +124629526 214677364 hcn vqc grm 1945-03-01 19:44:48.587793000 1987-10-21 10:06:01.203959000 -15590866346224490290657202841856028.946 +1569785916 1646464206 oje qlg bvh 1936-09-08 20:03:42.453895000 1939-02-19 17:04:53.343679000 2913139175860346398791960496071183.575 +-1387702284 -1216650714 sey hsn gbm 1971-12-18 06:06:22.133039000 1963-07-22 12:23:08.945284000 15597464911604415200700730529334031.97 +-1307091774 -1217043936 doj rdx sni 1993-11-26 14:16:30.622022000 1963-06-13 10:07:17.684362000 19875982808039194996135546681371523.482 +138261222 214939512 kfq mhs gbv 1948-11-16 02:14:25.633085000 1987-11-16 11:36:35.377907000 17096763665447307135246826115207706.501 +1475674784 1646726354 fau toj cni 1947-08-10 06:07:38.613442000 1939-03-17 18:35:27.517627000 -12754206463925931995766229780500471.536 +1556285294 1646333132 pkf eyt oju 1969-07-19 14:17:47.102425000 1939-02-06 16:19:36.256705000 -8475688567491152200331413628462980.024 +-1293460078 \N grm ito cwi 1997-08-12 20:46:07.667313000 1963-07-09 11:37:51.858310000 -11244523037141434632263809886437481.932 +44150090 -1216781788 bvh pkv oje 1959-10-17 12:18:21.792631000 1987-12-12 13:07:09.551855000 -19827845233956019796815952817660146.754 +124760600 215201660 lgr aug kvq 1945-03-14 20:30:05.674767000 1987-11-03 10:51:18.290933000 -15549327337521240001381136665622655.242 +1569916990 214808438 sni upk xje 1936-09-21 20:48:59.540869000 \N 2954678184563596688068026672304557.279 +-1387571210 1646595280 wid lwr kfq 1971-12-31 06:51:39.220013000 1939-03-04 17:50:10.430653000 15639003920307665489976796705567405.674 +-1306960700 -1216519640 hsn vhc wrm 1993-12-09 15:01:47.708996000 1938-12-29 14:03:44.995783000 19917521816742445285411612857604897.186 +138392296 -1216912862 oju qlw kfa 1948-11-29 02:59:42.720059000 1963-06-26 10:52:34.771336000 17138302674150557424522892291441080.205 +1475805858 215070586 jey xsn grm 1947-08-23 06:52:55.700416000 1987-11-29 12:21:52.464881000 -12712667455222681706490163604267097.832 +1556416368 1646857428 toj idx sny 1969-08-01 15:03:04.189398000 1987-09-25 08:35:27.030011000 -8434149558787901911055347452229606.32 +-1293329004 1646464206 kvq mxs gbm 1997-08-25 21:31:24.754287000 1939-02-19 17:04:53.343679000 -11202984028438184342987743710204108.228 +44281164 -1216650714 fal toa sni 1959-10-30 13:03:38.879605000 1963-07-22 12:23:08.945284000 -19786306225252769507539886641426773.05 +124891674 215332734 pkv eyk oau 1945-03-27 21:15:22.761741000 1963-05-18 08:36:43.510414000 -15507788328817989712105070489389281.538 +1570048064 214939512 wrm yto cni 1936-10-04 21:34:16.627843000 1987-11-16 11:36:35.377907000 2996217193266846977344092848537930.983 +-1387440136 1646726354 bmh pbv oju 1972-01-13 07:36:56.306987000 1939-03-17 18:35:27.517627000 15680542929010915779252862881800779.378 +-1306829626 -1216388566 lwr alg bvq 1993-12-22 15:47:04.795970000 1939-01-11 14:49:02.082757000 19959060825445695574687679033838270.89 +138523370 -1216781788 sny upb oje 1948-12-12 03:44:59.807033000 1963-07-09 11:37:51.858310000 17179841682853807713798958467674453.909 +1475936932 215201660 nid cwr kvq 1947-09-05 07:38:12.787390000 1987-12-12 13:07:09.551855000 -12671128446519431417214097428033724.128 +1556547442 1646988502 xsn mhc wrd 1969-08-14 15:48:21.276372000 1987-10-08 09:20:44.116985000 -8392610550084651621779281275996232.616 +-1293197930 1646595280 oau qcw kfq 1997-09-07 22:16:41.841261000 1939-03-04 17:50:10.430653000 -11161445019734934053711677533970734.524 +44412238 -1216519640 jep xse wrm 1959-11-12 13:48:55.966579000 1938-12-29 14:03:44.995783000 -19744767216549519218263820465193399.346 +125022748 215463808 toa ido sey 1945-04-09 22:00:39.848715000 1963-05-31 09:22:00.597388000 -15466249320114739422829004313155907.834 +1570179138 215070586 bvq dxs grm 1936-10-17 22:19:33.714817000 1987-11-29 12:21:52.464881000 3037756201970097266620159024771304.687 +-1387309062 1646857428 fql tfa sny 1972-01-26 08:22:13.393961000 1987-09-25 08:35:27.030011000 15722081937714166068528929058034153.082 +-1306698552 -1216257492 pbv epk fau 1994-01-04 16:32:21.882944000 1939-01-24 15:34:19.169731000 20000599834148945863963745210071644.594 +138654444 -1216650714 wrd ytf sni 1948-12-25 04:30:16.894006000 1963-07-22 12:23:08.945284000 17221380691557058003075024643907827.613 +1476068006 215332734 rmh gbv oau 1947-09-18 08:23:29.874363000 1963-05-18 08:36:43.510414000 -12629589437816181127938031251800350.424 +1556678516 214677364 cwr qlg bvh 1969-08-27 16:33:38.363346000 1987-10-21 10:06:01.203959000 -8351071541381401332503215099762858.912 +-1293066856 1646726354 sey ugb oju 1997-09-20 23:01:58.928235000 1939-03-17 18:35:27.517627000 -11119906011031683764435611357737360.82 +44543312 -1216388566 nit cwi bvq 1959-11-25 14:34:13.053553000 1939-01-11 14:49:02.082757000 -19703228207846268928987754288960025.642 +125153822 -1217043936 xse mhs gbv 1945-04-22 22:45:56.935689000 1963-06-13 10:07:17.684362000 -15424710311411489133552938136922534.13 +1570310212 215201660 fau hcw kvq 1936-10-30 23:04:50.801791000 1987-12-12 13:07:09.551855000 3079295210673347555896225201004678.391 +-1387177988 1646988502 jup xje wrd 1972-02-08 09:07:30.480935000 1987-10-08 09:20:44.116985000 15763620946417416357804995234267526.786 +-1306567478 1646333132 tfa ito cwi 1994-01-17 17:17:38.969918000 1939-02-06 16:19:36.256705000 20042138842852196153239811386305018.298 +138785518 -1216519640 bvh dxj wrm 1949-01-07 05:15:33.980980000 1938-12-29 14:03:44.995783000 17262919700260308292351090820141201.317 +1476199080 215463808 vql kfa sey 1947-10-01 09:08:46.961337000 1963-05-31 09:22:00.597388000 -12588050429112930838661965075566976.72 +1556809590 214808438 gbv upk xje 1969-09-09 17:18:55.450320000 1987-11-03 10:51:18.290933000 -8309532532678151043227148923529485.208 +-1292935782 1646857428 wid ykf sny 1997-10-03 23:47:16.015209000 1987-09-25 08:35:27.030011000 -11078367002328433475159545181503987.116 +44674386 -1216257492 rmx gbm fau 1959-12-08 15:19:30.140527000 1939-01-24 15:34:19.169731000 -19661689199143018639711688112726651.938 +125284896 -1216912862 cwi qlw kfa 1945-05-05 23:31:14.022663000 1963-06-26 10:52:34.771336000 -15383171302708238844276871960689160.426 +1570441286 215332734 jey lgb oau 1936-11-12 23:50:07.888765000 1963-05-18 08:36:43.510414000 3120834219376597845172291377238052.095 +-1387046914 214677364 nyt cni bvh 1972-02-21 09:52:47.567909000 1987-10-21 10:06:01.203959000 15805159955120666647081061410500900.49 +-1306436404 1646464206 xje mxs gbm 1994-01-30 18:02:56.056892000 1939-02-19 17:04:53.343679000 20083677851555446442515877562538392.002 +138916592 -1216388566 fal hcn bvq 1949-01-20 06:00:51.067954000 1939-01-11 14:49:02.082757000 17304458708963558581627156996374575.021 +1476330154 -1217043936 aup oje gbv 1947-10-14 09:54:04.048311000 1963-06-13 10:07:17.684362000 -12546511420409680549385898899333603.016 +1556940664 214939512 kfa yto cni 1969-09-22 18:04:12.537294000 1987-11-16 11:36:35.377907000 -8267993523974900753951082747296111.504 +-1292804708 1646988502 bmh doj wrd 1997-10-17 00:32:33.102183000 1987-10-08 09:20:44.116985000 -11036827993625183185883479005270613.412 +44805460 1646333132 vqc kfq cwi 1959-12-21 16:04:47.227501000 1939-02-06 16:19:36.256705000 -19620150190439768350435621936493278.234 +125415970 -1216781788 gbm upb oje 1945-05-19 00:16:31.109637000 1963-07-09 11:37:51.858310000 -15341632294004988555000805784455786.722 +1570572360 215463808 nid pkf sey 1936-11-26 00:35:24.975739000 1963-05-31 09:22:00.597388000 3162373228079848134448357553471425.799 +-1386915840 214808438 rdx grm xje 1972-03-05 10:38:04.654883000 1987-11-03 10:51:18.290933000 15846698963823916936357127586734274.194 +-1306305330 1646595280 cni qcw kfq 1994-02-12 18:48:13.143865000 1939-03-04 17:50:10.430653000 20125216860258696731791943738771765.706 +139047666 -1216257492 jep lgr fau 1949-02-02 06:46:08.154928000 1939-01-24 15:34:19.169731000 17345997717666808870903223172607948.725 +1476461228 -1216912862 eyt sni kfa 1947-10-27 10:39:21.135285000 1963-06-26 10:52:34.771336000 -12504972411706430260109832723100229.312 +1557071738 215070586 oje dxs grm 1969-10-05 18:49:29.624268000 1987-11-29 12:21:52.464881000 -8226454515271650464675016571062737.8 +-1292673634 214677364 fql hsn bvh 1997-10-30 01:17:50.189157000 1987-10-21 10:06:01.203959000 -10995288984921932896607412829037239.708 +44936534 1646464206 aug oju gbm 1960-01-03 16:50:04.314475000 1939-02-19 17:04:53.343679000 -19578611181736518061159555760259904.53 +125547044 -1216650714 kfq ytf sni 1945-06-01 01:01:48.196611000 1963-07-22 12:23:08.945284000 -15300093285301738265724739608222413.018 +1570703434 -1217043936 rmh toj gbv 1936-12-09 01:20:42.062713000 1963-06-13 10:07:17.684362000 3203912236783098423724423729704799.503 +\N 214939512 vhc kvq cni 1972-03-18 11:23:21.741857000 1987-11-16 11:36:35.377907000 15888237972527167225633193762967647.898 +-1386784766 1646726354 grm ugb oju 1994-02-25 19:33:30.230839000 1939-03-17 18:35:27.517627000 20166755868961947021068009915005139.41 +-1306174256 1646333132 nit pkv cwi 1949-02-15 07:31:25.241902000 1939-02-06 16:19:36.256705000 17387536726370059160179289348841322.429 +139178740 -1216781788 idx wrm oje 1947-11-09 11:24:38.222259000 1963-07-09 11:37:51.858310000 -12463433403003179970833766546866855.608 +1476592302 215201660 sni hcw kvq 1969-10-18 19:34:46.711242000 1987-12-12 13:07:09.551855000 -8184915506568400175398950394829364.096 +1557202812 214808438 jup lwr xje 1997-11-12 02:03:07.276131000 1987-11-03 10:51:18.290933000 -10953749976218682607331346652803866.004 +-1292542560 1646595280 eyk sny kfq 1960-01-16 17:35:21.401449000 1939-03-04 17:50:10.430653000 -19537072173033267771883489584026530.826 +45067608 -1216519640 oju dxj wrm 1945-06-14 01:47:05.283585000 1938-12-29 14:03:44.995783000 -15258554276598487976448673431989039.314 +125678118 -1216912862 vql xsn kfa 1936-12-22 02:05:59.149687000 1963-06-26 10:52:34.771336000 3245451245486348713000489905938173.207 +1570834508 215070586 alg oau grm 1972-03-31 12:08:38.828830000 \N 15929776981230417514909259939201021.602 +-1386653692 1646857428 kvq ykf sny 1994-03-10 20:18:47.317813000 1987-11-29 12:21:52.464881000 20208294877665197310344076091238513.114 +-1306043182 1646464206 rmx toa gbm 1949-02-28 08:16:42.328876000 1987-09-25 08:35:27.030011000 17429075735073309449455355525074696.133 +139309814 -1216650714 mhc bvq sni 1947-11-22 12:09:55.309233000 1939-02-19 17:04:53.343679000 -12421894394299929681557700370633481.904 +1476723376 215332734 wrm lgb oau 1969-10-31 20:20:03.798216000 1963-07-22 12:23:08.945284000 -8143376497865149886122884218595990.392 +1557333886 214939512 nyt pbv cni 1997-11-25 02:48:24.363105000 1963-05-18 08:36:43.510414000 -10912210967515432318055280476570492.3 +-1292411486 1646726354 ido wrd oju 1960-01-29 18:20:38.488423000 1987-11-16 11:36:35.377907000 -19495533164330017482607423407793157.122 +45198682 -1216388566 sny hcn bvq 1945-06-27 02:32:22.370559000 1939-03-17 18:35:27.517627000 -15217015267895237687172607255755665.61 +125809192 -1216781788 aup cwr oje 1937-01-04 02:51:16.236661000 1939-01-11 14:49:02.082757000 3286990254189599002276556082171546.911 +1570965582 215201660 epk sey kvq 1972-04-13 12:53:55.915804000 1963-07-09 11:37:51.858310000 15971315989933667804185326115434395.306 +-1386522618 1646988502 oau doj wrd 1994-03-23 21:04:04.404787000 1987-12-12 13:07:09.551855000 20249833886368447599620142267471886.818 +-1305912108 1646595280 vqc xse kfq 1949-03-13 09:01:59.415850000 1987-10-08 09:20:44.116985000 17470614743776559738731421701308069.837 +139440888 -1216519640 qlg fau wrm 1947-12-05 12:55:12.396207000 1939-03-04 17:50:10.430653000 -12380355385596679392281634194400108.2 +1476854450 215463808 bvq \N sey 1969-11-13 21:05:20.885190000 1938-12-29 14:03:44.995783000 -8101837489161899596846818042362616.688 +1557464960 215070586 rdx pkf grm 1997-12-08 03:33:41.450079000 1963-05-31 09:22:00.597388000 -10870671958812182028779214300337118.596 +-1292280412 1646857428 mhs tfa sny 1960-02-11 19:05:55.575397000 1987-11-29 12:21:52.464881000 -19453994155626767193331357231559783.418 +45329756 -1216257492 wrd bvh fau 1945-07-10 03:17:39.457533000 1987-09-25 08:35:27.030011000 -15175476259191987397896541079522291.906 +125940266 -1216650714 eyt lgr sni 1937-01-17 03:36:33.323635000 1939-01-24 15:34:19.169731000 3328529262892849291552622258404920.615 +1571096656 215332734 ito gbv oau 1972-04-26 13:39:13.002778000 1963-07-22 12:23:08.945284000 16012854998636918093461392291667769.01 +-1386391544 214677364 sey wid bvh 1994-04-05 21:49:21.491761000 1963-05-18 08:36:43.510414000 20291372895071697888896208443705260.522 +-1305781034 1646726354 aug hsn oju 1949-03-26 09:47:16.502824000 1987-10-21 10:06:01.203959000 17512153752479810028007487877541443.541 +139571962 -1216388566 upk cwi bvq 1947-12-18 13:40:29.483181000 1939-03-17 18:35:27.517627000 -12338816376893429103005568018166734.496 +1476985524 -1217043936 fau jey gbv 1969-11-26 21:50:37.972164000 1939-01-11 14:49:02.082757000 -8060298480458649307570751866129242.984 +1557596034 215201660 vhc toj kvq 1997-12-21 04:18:58.537053000 1963-06-13 10:07:17.684362000 -10829132950108931739503148124103744.892 +-1292149338 1646988502 qlw xje wrd 1960-02-24 19:51:12.662371000 1987-12-12 13:07:09.551855000 -19412455146923516904055291055326409.714 +45460830 1646333132 bvh fal cwi 1945-07-23 04:02:56.544507000 1987-10-08 09:20:44.116985000 -15133937250488737108620474903288918.202 +126071340 -1216519640 idx pkv wrm 1937-01-30 04:21:50.410609000 1939-02-06 16:19:36.256705000 3370068271596099580828688434638294.319 +1571227730 215463808 mxs kfa sey 1972-05-09 14:24:30.089752000 1938-12-29 14:03:44.995783000 16054394007340168382737458467901142.714 +-1386260470 214808438 wid bmh xje 1994-04-18 22:34:38.578735000 1963-05-31 09:22:00.597388000 20332911903774948178172274619938634.226 +-1305649960 1646857428 eyk lwr sny 1949-04-08 10:32:33.589798000 1987-11-03 10:51:18.290933000 17553692761183060317283554053774817.245 +139703036 -1216257492 yto gbm fau 1947-12-31 14:25:46.570155000 1987-09-25 08:35:27.030011000 -12297277368190178813729501841933360.792 +1477116598 -1216912862 jey nid kfa 1969-12-09 22:35:55.059138000 1939-01-24 15:34:19.169731000 -8018759471755399018294685689895869.28 +1557727108 215332734 alg xsn oau 1998-01-03 05:04:15.624027000 1963-06-26 10:52:34.771336000 -10787593941405681450227081947870371.188 +-1292018264 214677364 upb cni bvh 1960-03-08 20:36:29.749345000 1963-05-18 08:36:43.510414000 -19370916138220266614779224879093036.01 +45591904 1646464206 fal jep gbm 1945-08-05 04:48:13.631481000 1987-10-21 10:06:01.203959000 -15092398241785486819344408727055544.498 +126202414 -1216388566 \N toa bvq 1937-02-12 05:07:07.497583000 1939-02-19 17:04:53.343679000 3411607280299349870104754610871668.023 +1571358804 -1217043936 mhc oje gbv 1972-05-22 15:09:47.176726000 1939-01-11 14:49:02.082757000 16095933016043418672013524644134516.418 +-1386129396 214939512 qcw fql cni 1994-05-01 23:19:55.665709000 1963-06-13 10:07:17.684362000 20374450912478198467448340796172007.93 +-1305518886 1646988502 bmh pbv wrd 1949-04-21 11:17:50.676772000 1987-11-16 11:36:35.377907000 17595231769886310606559620230008190.949 +139834110 1646333132 ido kfq cwi 1948-01-13 15:11:03.657129000 1987-10-08 09:20:44.116985000 -12255738359486928524453435665699987.088 +1477247672 -1216781788 dxs rmh oje 1969-12-22 23:21:12.146112000 1939-02-06 16:19:36.256705000 -7977220463052148729018619513662495.576 +1557858182 215463808 nid cwr sey 1998-01-16 05:49:32.711001000 1963-07-09 11:37:51.858310000 -10746054932702431160951015771636997.484 +-1291887190 214808438 epk grm xje 1960-03-21 21:21:46.836319000 1963-05-31 09:22:00.597388000 -19329377129517016325503158702859662.306 +45722978 1646595280 ytf nit kfq 1945-08-18 05:33:30.718455000 1987-11-03 10:51:18.290933000 -15050859233082236530068342550822170.794 +126333488 -1216257492 jep xse fau 1937-02-25 05:52:24.584556000 1939-03-04 17:50:10.430653000 3453146289002600159380820787105041.727 +1571489878 -1216912862 qlg sni kfa 1972-06-04 15:55:04.263700000 1939-01-24 15:34:19.169731000 16137472024746668961289590820367890.122 +-1385998322 215070586 ugb jup grm 1994-05-15 00:05:12.752683000 1963-06-26 10:52:34.771336000 20415989921181448756724406972405381.634 +-1305387812 214677364 fql tfa bvh 1949-05-04 12:03:07.763746000 1987-11-29 12:21:52.464881000 17636770778589560895835686406241564.653 +139965184 1646464206 mhs oju gbm 1948-01-26 15:56:20.744103000 1987-10-21 10:06:01.203959000 -12214199350783678235177369489466613.384 +1477378746 -1216650714 hcw vql sni 1933-06-24 00:08:04.626239000 1939-02-19 17:04:53.343679000 -7935681454348898439742553337429121.872 +1557989256 -1217043936 rmh gbv gbv 1998-01-29 06:34:49.797975000 1963-07-22 12:23:08.945284000 -10704515923999180871674949595403623.78 +-1291756116 214939512 ito kvq cni 1960-04-03 22:07:03.923293000 1963-06-13 10:07:17.684362000 -19287838120813766036227092526626288.602 +45854052 1646726354 dxj rmx oju 1945-08-31 06:18:47.805429000 1987-11-16 11:36:35.377907000 -15009320224378986240792276374588797.09 +126464562 1646333132 nit cwi cwi 1937-03-10 06:37:41.671530000 1939-03-17 18:35:27.517627000 3494685297705850448656886963338415.431 +1571620952 -1216781788 upk wrm oje 1972-06-17 16:40:21.350674000 1939-02-06 16:19:36.256705000 16179011033449919250565656996601263.826 +-1385867248 215201660 ykf nyt kvq 1994-05-28 00:50:29.839657000 1963-07-09 11:37:51.858310000 20457528929884699046000473148638755.338 +-1305256738 214808438 jup xje xje 1949-05-17 12:48:24.850720000 1987-12-12 13:07:09.551855000 17678309787292811185111752582474938.357 +140096258 1646595280 qlw sny kfq 1948-02-08 16:41:37.831077000 1987-11-03 10:51:18.290933000 -12172660342080427945901303313233239.68 +1477509820 -1216519640 lgb aup wrm 1933-07-07 00:53:21.713213000 1939-03-04 17:50:10.430653000 -7894142445645648150466487161195748.168 +1558120330 -1216912862 vql kfa kfa 1998-02-11 07:20:06.884949000 1938-12-29 14:03:44.995783000 -10662976915295930582398883419170250.076 +-1291625042 215070586 mxs oau grm 1960-04-16 22:52:21.010267000 1963-06-26 10:52:34.771336000 -19246299112110515746951026350392914.898 +45985126 1646857428 hcn vqc sny 1945-09-13 07:04:04.892403000 1987-11-29 12:21:52.464881000 -14967781215675735951516210198355423.386 +126595636 1646464206 rmx gbm gbm 1937-03-23 07:22:58.758504000 1987-09-25 08:35:27.030011000 3536224306409100737932953139571789.135 +1571752026 -1216650714 yto bvq sni 1972-06-30 17:25:38.437648000 1939-02-19 17:04:53.343679000 16220550042153169539841723172834637.53 +-1385736174 215332734 doj rdx oau 1994-06-10 01:35:46.926631000 1963-07-22 12:23:08.945284000 20499067938587949335276539324872129.042 +-1305125664 214939512 nyt cni cni 1949-05-30 13:33:41.937694000 1963-05-18 08:36:43.510414000 17719848795996061474387818758708312.061 +140227332 1646726354 upb wrd oju 1948-02-21 17:26:54.918051000 1987-11-16 11:36:35.377907000 -12131121333377177656625237136999865.976 +1477640894 -1216388566 pkf eyt bvq 1933-07-20 01:38:38.800187000 1939-03-17 18:35:27.517627000 -7852603436942397861190420984962374.464 +1558251404 -1216781788 aup oje oje 1998-02-24 08:05:23.971923000 1939-01-11 14:49:02.082757000 -10621437906592680293122817242936876.372 +-1291493968 215201660 qcw sey kvq 1960-04-29 23:37:38.097241000 1963-07-09 11:37:51.858310000 -19204760103407265457674960174159541.194 +46116200 1646988502 lgr aug wrd 1945-09-26 07:49:21.979376000 1987-12-12 13:07:09.551855000 -14926242206972485662240144022122049.682 +126726710 1646595280 vqc kfq kfq 1937-04-05 08:08:15.845478000 1987-10-08 09:20:44.116985000 3577763315112351027209019315805162.839 +1571883100 -1216519640 dxs fau wrm 1972-07-13 18:10:55.524622000 1939-03-04 17:50:10.430653000 16262089050856419829117789349068011.234 +-1385605100 215463808 hsn vhc sey 1994-06-23 02:21:04.013605000 1938-12-29 14:03:44.995783000 20540606947291199624552605501105502.746 +-1304994590 215070586 rdx grm grm 1949-06-12 14:18:59.024668000 1963-05-31 09:22:00.597388000 17761387804699311763663884934941685.765 +140358406 1646857428 ytf bvh sny 1948-03-05 18:12:12.005025000 1987-11-29 12:21:52.464881000 -12089582324673927367349170960766492.272 +1477771968 -1216257492 toj idx fau 1933-08-02 02:23:55.887161000 1987-09-25 08:35:27.030011000 -7811064428239147571914354808729000.76 +1558382478 -1216650714 eyt sni sni 1998-03-09 08:50:41.058896000 1939-01-24 15:34:19.169731000 -10579898897889430003846751066703502.668 +-1291362894 215332734 ugb wid oau 1960-05-13 00:22:55.184214000 1963-07-22 12:23:08.945284000 -19163221094704015168398893997926167.49 +46247274 214677364 pkv eyk bvh 1945-10-09 08:34:39.066350000 1963-05-18 08:36:43.510414000 -14884703198269235372964077845888675.978 +126857784 1646726354 aug oju oju 1937-04-18 08:53:32.932452000 1987-10-21 10:06:01.203959000 3619302323815601316485085492038536.543 +1572014174 -1216388566 hcw jey bvq 1972-07-26 18:56:12.611596000 1939-03-17 18:35:27.517627000 16303628059559670118393855525301384.938 +-1385474026 -1217043936 lwr alg gbv 1994-07-06 03:06:21.100579000 1939-01-11 14:49:02.082757000 20582145955994449913828671677338876.45 +-1304863516 215201660 vhc kvq kvq 1949-06-25 15:04:16.111642000 1963-06-13 10:07:17.684362000 17802926813402562052939951111175059.469 +140489480 1646988502 dxj fal wrd 1948-03-18 18:57:29.091999000 1987-12-12 13:07:09.551855000 -12048043315970677078073104784533118.568 +1477903042 1646333132 xsn mhc cwi 1933-08-15 03:09:12.974135000 1987-10-08 09:20:44.116985000 -7769525419535897282638288632495627.056 +1558513552 -1216519640 idx wrm wrm 1998-03-22 09:35:58.145870000 1939-02-06 16:19:36.256705000 -10538359889186179714570684890470128.964 +-1291231820 215463808 ykf bmh sey 1960-05-26 01:08:12.271188000 1938-12-29 14:03:44.995783000 -19121682086000764879122827821692793.786 +46378348 214808438 toa ido xje 1945-10-22 09:19:56.153324000 1963-05-31 09:22:00.597388000 -14843164189565985083688011669655302.274 +126988858 1646857428 eyk sny sny 1937-05-01 09:38:50.019426000 1987-11-03 10:51:18.290933000 3660841332518851605761151668271910.247 +1572145248 -1216257492 lgb nid fau 1972-08-08 19:41:29.698570000 1987-09-25 08:35:27.030011000 16345167068262920407669921701534758.642 +-1385342952 -1216912862 pbv epk kfa 1994-07-19 03:51:38.187553000 1939-01-24 15:34:19.169731000 20623684964697700203104737853572250.154 +-1304732442 215332734 alg oau oau 1949-07-08 15:49:33.198616000 1963-06-26 10:52:34.771336000 17844465822105812342216017287408433.173 +140620554 214677364 hcn jep bvh 1948-03-31 19:42:46.178973000 1963-05-18 08:36:43.510414000 -12006504307267426788797038608299744.864 +1478034116 1646464206 cwr qlg gbm 1933-08-28 03:54:30.061109000 1987-10-21 10:06:01.203959000 -7727986410832646993362222456262253.352 +1558644626 -1216388566 mhc bvq bvq 1998-04-04 10:21:15.232844000 1939-02-19 17:04:53.343679000 -10496820880482929425294618714236755.26 +-1291100746 -1217043936 doj fql gbv 1960-06-08 01:53:29.358162000 1939-01-11 14:49:02.082757000 -19080143077297514589846761645459420.082 +46509422 214939512 xse mhs cni 1945-11-04 10:05:13.240298000 1963-06-13 10:07:17.684362000 -14801625180862734794411945493421928.57 +127119932 1646988502 ido wrd wrd 1937-05-14 10:24:07.106400000 1987-11-16 11:36:35.377907000 3702380341222101895037217844505283.951 +1572276322 1646333132 pkf rmh cwi 1972-08-21 20:26:46.785544000 1987-10-08 09:20:44.116985000 16386706076966170696945987877768132.346 +-1385211878 -1216781788 tfa ito oje 1994-08-01 04:36:55.274527000 1939-02-06 16:19:36.256705000 20665223973400950492380804029805623.858 +-1304601368 215463808 epk sey sey 1949-07-21 16:34:50.285590000 1963-07-09 11:37:51.858310000 17886004830809062631492083463641806.877 +140751628 214808438 lgr nit xje 1948-04-13 20:28:03.265947000 1963-05-31 09:22:00.597388000 -11964965298564176499520972432066371.16 +1478165190 1646595280 gbv upk kfq 1933-09-10 04:39:47.148083000 1987-11-03 10:51:18.290933000 -7686447402129396704086156280028879.648 +1558775700 -1216257492 qlg fau fau 1998-04-17 11:06:32.319818000 1939-03-04 17:50:10.430653000 -10455281871779679136018552538003381.556 +-1290969672 -1216912862 hsn jup kfa 1960-06-21 02:38:46.445136000 1939-01-24 15:34:19.169731000 -19038604068594264300570695469226046.378 +46640496 215070586 cwi qlw grm 1945-11-17 10:50:30.327272000 1963-06-26 10:52:34.771336000 -14760086172159484505135879317188554.866 +127251006 214677364 mhs bvh bvh 1937-05-27 11:09:24.193374000 1987-11-29 12:21:52.464881000 3743919349925352184313284020738657.655 +1572407396 1646464206 toj vql gbm 1972-09-03 21:12:03.872518000 1987-10-21 10:06:01.203959000 16428245085669420986222054054001506.05 +-1385080804 -1216650714 xje mxs sni 1994-08-14 05:22:12.361501000 1939-02-19 17:04:53.343679000 20706762982104200781656870206038997.562 +-1304470294 -1217043936 ito wid \N 1949-08-03 17:20:07.372564000 1963-07-22 12:23:08.945284000 17927543839512312920768149639875180.581 +140882702 214939512 pkv rmx gbv 1948-04-26 21:13:20.352921000 1963-06-13 10:07:17.684362000 -11923426289860926210244906255832997.456 +1478296264 1646726354 kfa yto cni 1933-09-23 05:25:04.235057000 1987-11-16 11:36:35.377907000 -7644908393426146414810090103795505.944 +1558906774 1646333132 upk jey oju 1998-04-30 11:51:49.406792000 1939-03-17 18:35:27.517627000 -10413742863076428846742486361770007.852 +-1290838598 -1216781788 lwr nyt cwi 1960-07-04 03:24:03.532110000 1939-02-06 16:19:36.256705000 -18997065059891014011294629292992672.674 +46771570 215201660 gbm upb oje 1945-11-30 11:35:47.414246000 1963-07-09 11:37:51.858310000 -14718547163456234215859813140955181.162 +127382080 214808438 qlw fal kvq 1937-06-09 11:54:41.280348000 1987-12-12 13:07:09.551855000 3785458358628602473589350196972031.359 +1572538470 1646595280 xsn aup xje 1972-09-16 21:57:20.959492000 1987-11-03 10:51:18.290933000 16469784094372671275498120230234879.754 +-1384949730 -1216519640 cni qcw kfq 1994-08-27 06:07:29.448475000 1939-03-04 17:50:10.430653000 20748301990807451070932936382272371.266 +-1304339220 -1216912862 mxs bmh wrm 1949-08-16 18:05:24.459538000 1938-12-29 14:03:44.995783000 17969082848215563210044215816108554.285 +141013776 215070586 toa vqc kfa 1948-05-09 21:58:37.439895000 1963-06-26 10:52:34.771336000 -11881887281157675920968840079599623.752 +1478427338 1646857428 oje dxs grm 1933-10-06 06:10:21.322031000 1987-11-29 12:21:52.464881000 -7603369384722896125534023927562132.24 +1559037848 1646464206 yto nid sny 1998-05-13 12:37:06.493766000 1987-09-25 08:35:27.030011000 -10372203854373178557466420185536634.148 +-1290707524 -1216650714 pbv rdx gbm 1960-07-17 04:09:20.619084000 1939-02-19 17:04:53.343679000 -18955526051187763722018563116759298.97 +46902644 215332734 kfq ytf sni 1945-12-13 12:21:04.501220000 1963-07-22 12:23:08.945284000 -14677008154752983926583746964721807.458 +127513154 214939512 upb jep oau 1937-06-22 12:39:58.367322000 1963-05-18 08:36:43.510414000 3826997367331852762865416373205405.063 +1572669544 1646726354 cwr eyt cni 1972-09-29 22:42:38.046466000 1987-11-16 11:36:35.377907000 16511323103075921564774186406468253.458 +-1384818656 -1216388566 grm ugb oju 1994-09-09 06:52:46.535449000 1939-03-17 18:35:27.517627000 20789840999510701360209002558505744.97 +-1304208146 -1216781788 qcw fql \N 1949-08-29 18:50:41.546512000 1939-01-11 14:49:02.082757000 18010621856918813499320281992341927.989 +141144850 215201660 xse aug bvq 1948-05-22 22:43:54.526869000 1963-07-09 11:37:51.858310000 -11840348272454425631692773903366250.048 +1478558412 1646988502 sni hcw oje 1933-10-19 06:55:38.409005000 1987-12-12 13:07:09.551855000 -7561830376019645836257957751328758.536 +1559168922 1646595280 dxs rmh kvq 1998-05-26 13:22:23.580740000 1987-10-08 09:20:44.116985000 -10330664845669928268190354009303260.444 +-1290576450 -1216519640 tfa vhc wrd 1960-07-30 04:54:37.706058000 1939-03-04 17:50:10.430653000 \N +47033718 215463808 oju dxj kfq 1945-12-26 13:06:21.588194000 1938-12-29 14:03:44.995783000 -18913987042484513432742496940525925.266 +127644228 215070586 ytf nit wrm 1937-07-05 13:25:15.454296000 1963-05-31 09:22:00.597388000 -14635469146049733637307680788488433.754 +1572800618 1646857428 gbv idx sey 1972-10-12 23:27:55.133440000 1987-11-29 12:21:52.464881000 3868536376035103052141482549438778.767 +-1384687582 -1216257492 kvq ykf grm 1994-09-22 07:38:03.622423000 1987-09-25 08:35:27.030011000 16552862111779171854050252582701627.162 +-1304077072 -1216650714 ugb jup sny 1949-09-11 19:35:58.633486000 1939-01-24 15:34:19.169731000 20831380008213951649485068734739118.674 +141275924 215332734 cwi eyk fau 1948-06-04 23:29:11.613843000 1963-07-22 12:23:08.945284000 18052160865622063788596348168575301.693 +1478689486 214677364 wrm lgb sni 1933-11-01 07:40:55.495979000 1963-05-18 08:36:43.510414000 -11798809263751175342416707727132876.344 +1559299996 1646726354 hcw vql oau 1998-06-08 14:07:40.667714000 1987-10-21 10:06:01.203959000 -7520291367316395546981891575095384.832 +-1290445376 -1216388566 xje alg bvh 1960-08-12 05:39:54.793032000 1939-03-17 18:35:27.517627000 -10289125836966677978914287833069886.74 +47164792 -1217043936 sny hcn oju 1946-01-08 13:51:38.675168000 1939-01-11 14:49:02.082757000 -18872448033781263143466430764292551.562 +127775302 215201660 dxj rmx bvq 1937-07-18 14:10:32.541270000 1963-06-13 10:07:17.684362000 -14593930137346483348031614612255060.05 +1572931692 1646988502 kfa mhc gbv 1972-10-26 00:13:12.220414000 1987-12-12 13:07:09.551855000 3910075384738353341417548725672152.471 +-1384556508 1646333132 oau doj kvq 1994-10-05 08:23:20.709397000 1987-10-08 09:20:44.116985000 16594401120482422143326318758935000.866 +-1303945998 -1216519640 ykf nyt wrd 1949-09-24 20:21:15.720460000 1939-02-06 16:19:36.256705000 20872919016917201938761134910972492.378 +141406998 215463808 gbm ido cwi 1948-06-18 00:14:28.700817000 1938-12-29 14:03:44.995783000 18093699874325314077872414344808675.397 +1478820560 214808438 bvq pkf wrm 1933-11-14 08:26:12.582953000 1963-05-31 09:22:00.597388000 -11757270255047925053140641550899502.64 +1559431070 1646857428 lgb aup sey 1998-06-21 14:52:57.754688000 1987-11-03 10:51:18.290933000 -7478752358613145257705825398862011.128 +-1290314302 -1216257492 cni epk xje 1960-08-25 06:25:11.880006000 1987-09-25 08:35:27.030011000 -10247586828263427689638221656836513.036 +47295866 -1216912862 wrd lgr sny 1946-01-21 14:36:55.762142000 1939-01-24 15:34:19.169731000 -18830909025078012854190364588059177.858 +127906376 215332734 hcn vqc fau 1937-07-31 14:55:49.628244000 1963-06-26 10:52:34.771336000 -14552391128643233058755548436021686.346 +1573062766 214677364 oje qlg kfa 1972-11-08 00:58:29.307388000 1963-05-18 08:36:43.510414000 3951614393441603630693614901905526.175 +-1384425434 1646464206 sey hsn oau 1994-10-18 09:08:37.796371000 1987-10-21 10:06:01.203959000 16635940129185672432602384935168374.57 +-1303814924 -1216388566 doj rdx bvh 1949-10-07 21:06:32.807434000 1939-02-19 17:04:53.343679000 20914458025620452228037201087205866.082 +141538072 -1217043936 kfq mhs gbm 1948-07-01 00:59:45.787790000 1939-01-11 14:49:02.082757000 18135238883028564367148480521042049.101 +1478951634 214939512 fau toj bvq 1933-11-27 09:11:29.669926000 1963-06-13 10:07:17.684362000 -11715731246344674763864575374666128.936 +1559562144 1646988502 pkf eyt gbv 1998-07-04 15:38:14.841662000 1987-11-16 11:36:35.377907000 -7437213349909894968429759222628637.424 +-1290183228 1646333132 grm ito cni 1960-09-07 07:10:28.966980000 \N -10206047819560177400362155480603139.332 +47426940 -1216781788 bvh pkv wrd 1946-02-03 15:22:12.849116000 1987-10-08 09:20:44.116985000 -18789370016374762564914298411825804.154 +128037450 215463808 lgr aug cwi 1937-08-13 15:41:06.715218000 1939-02-06 16:19:36.256705000 -14510852119939982769479482259788312.642 +1573193840 214808438 sni upk oje 1972-11-21 01:43:46.394362000 1963-07-09 11:37:51.858310000 3993153402144853919969681078138899.879 +-1384294360 1646595280 wid lwr sey 1994-10-31 09:53:54.883345000 1963-05-31 09:22:00.597388000 16677479137888922721878451111401748.274 +-1303683850 -1216257492 hsn vhc xje 1949-10-20 21:51:49.894407000 1987-11-03 10:51:18.290933000 20955997034323702517313267263439239.786 +141669146 -1216912862 oju qlw kfq 1948-07-14 01:45:02.874764000 1939-03-04 17:50:10.430653000 18176777891731814656424546697275422.805 +1479082708 215070586 jey xsn fau 1933-12-10 09:56:46.756900000 1939-01-24 15:34:19.169731000 -11674192237641424474588509198432755.232 +1559693218 214677364 toj idx kfa 1998-07-17 16:23:31.928636000 1963-06-26 10:52:34.771336000 -7395674341206644679153693046395263.72 +-1290052154 1646464206 kvq mxs grm 1960-09-20 07:55:46.053954000 1987-11-29 12:21:52.464881000 -10164508810856927111086089304369765.628 +47558014 -1216650714 fal toa bvh 1946-02-16 16:07:29.936090000 1987-10-21 10:06:01.203959000 -18747831007671512275638232235592430.45 +128168524 -1217043936 pkv eyk gbm 1937-08-26 16:26:23.802192000 1939-02-19 17:04:53.343679000 -14469313111236732480203416083554938.938 +1573324914 214939512 wrm yto sni 1972-12-04 02:29:03.481336000 1963-07-22 12:23:08.945284000 4034692410848104209245747254372273.583 +-1384163286 1646726354 bmh pbv gbv 1994-11-13 10:39:11.970319000 1963-06-13 10:07:17.684362000 16719018146592173011154517287635121.978 +-1303552776 1646333132 lwr alg cni 1949-11-02 22:37:06.981381000 1987-11-16 11:36:35.377907000 20997536043026952806589333439672613.49 +141800220 -1216781788 sny upb oju 1948-07-27 02:30:19.961738000 1939-03-17 18:35:27.517627000 18218316900435064945700612873508796.509 +1479213782 215201660 nid cwr cwi 1933-12-23 10:42:03.843874000 1939-02-06 16:19:36.256705000 -11632653228938174185312443022199381.528 +1559824292 214808438 xsn mhc oje 1998-07-30 17:08:49.015610000 1963-07-09 11:37:51.858310000 -7354135332503394389877626870161890.016 +-1289921080 1646595280 oau qcw kvq 1960-10-03 08:41:03.140928000 1987-12-12 13:07:09.551855000 -10122969802153676821810023128136391.924 +47689088 -1216519640 jep xse xje 1946-03-01 16:52:47.023064000 1987-11-03 10:51:18.290933000 -18706291998968261986362166059359056.746 +128299598 -1216912862 toa ido kfq 1937-09-08 17:11:40.889166000 1939-03-04 17:50:10.430653000 -14427774102533482190927349907321565.234 +1573455988 215070586 bvq dxs wrm 1972-12-17 03:14:20.568310000 1938-12-29 14:03:44.995783000 4076231419551354498521813430605647.287 +-1384032212 1646857428 fql tfa kfa 1994-11-26 11:24:29.057293000 1963-06-26 10:52:34.771336000 16760557155295423300430583463868495.682 +-1303421702 1646464206 pbv epk grm 1949-11-15 23:22:24.068355000 1987-11-29 12:21:52.464881000 21039075051730203095865399615905987.194 +141931294 -1216650714 wrd ytf sny 1948-08-09 03:15:37.048712000 1987-09-25 08:35:27.030011000 18259855909138315234976679049742170.213 +1479344856 215332734 rmh gbv gbm 1934-01-05 11:27:20.930848000 1939-02-19 17:04:53.343679000 -11591114220234923896036376845966007.824 +1559955366 214939512 cwr qlg sni 1998-08-12 17:54:06.102584000 1963-07-22 12:23:08.945284000 -7312596323800144100601560693928516.312 +-1289790006 1646726354 sey ugb oau 1960-10-16 09:26:20.227902000 1963-05-18 08:36:43.510414000 -10081430793450426532533956951903018.22 +47820162 -1216388566 nit cwi cni 1946-03-14 17:38:04.110038000 1987-11-16 11:36:35.377907000 -18664752990265011697086099883125683.042 +128430672 -1216781788 xse mhs oju 1937-09-21 17:56:57.976140000 1939-03-17 18:35:27.517627000 -14386235093830231901651283731088191.53 +1573587062 215201660 fau hcw bvq 1972-12-30 03:59:37.655284000 1939-01-11 14:49:02.082757000 4117770428254604787797879606839020.991 +-1383901138 1646988502 jup xje oje 1994-12-09 12:09:46.144266000 1963-07-09 11:37:51.858310000 16802096163998673589706649640101869.386 +-1303290628 1646595280 tfa ito kvq 1949-11-29 00:07:41.155329000 1987-12-12 13:07:09.551855000 21080614060433453385141465792139360.898 +142062368 \N bvh dxj wrd 1948-08-22 04:00:54.135686000 1987-10-08 09:20:44.116985000 18301394917841565524252745225975543.917 +1479475930 -1216519640 vql kfa kfq 1934-01-18 12:12:38.017822000 1939-03-04 17:50:10.430653000 -11549575211531673606760310669732634.12 +1560086440 215463808 gbv upk wrm 1998-08-25 18:39:23.189558000 1938-12-29 14:03:44.995783000 -7271057315096893811325494517695142.608 +-1289658932 215070586 wid ykf sey 1960-10-29 10:11:37.314876000 1963-05-31 09:22:00.597388000 -10039891784747176243257890775669644.516 +47951236 1646857428 rmx gbm grm 1946-03-27 18:23:21.197012000 1987-11-29 12:21:52.464881000 -18623213981561761407810033706892309.338 +128561746 -1216257492 cwi qlw sny 1937-10-04 18:42:15.063114000 1987-09-25 08:35:27.030011000 -14344696085126981612375217554854817.826 +1573718136 -1216650714 jey lgb fau 1973-01-12 04:44:54.742258000 1939-01-24 15:34:19.169731000 4159309436957855077073945783072394.695 +-1383770064 215332734 nyt cni sni 1994-12-22 12:55:03.231240000 1963-07-22 12:23:08.945284000 16843635172701923878982715816335243.09 +-1303159554 214677364 xje mxs oau 1949-12-12 00:52:58.242303000 1963-05-18 08:36:43.510414000 21122153069136703674417531968372734.602 +142193442 1646726354 fal hcn bvh 1948-09-04 04:46:11.222660000 1987-10-21 10:06:01.203959000 18342933926544815813528811402208917.621 +1479607004 -1216388566 aup oje oju 1934-01-31 12:57:55.104796000 1939-03-17 18:35:27.517627000 -11508036202828423317484244493499260.416 +1560217514 -1217043936 kfa yto bvq 1998-09-07 19:24:40.276532000 1939-01-11 14:49:02.082757000 -7229518306393643522049428341461768.904 +-1289527858 215201660 bmh doj gbv 1960-11-11 10:56:54.401850000 1963-06-13 10:07:17.684362000 -9998352776043925953981824599436270.812 +48082310 1646988502 vqc kfq kvq 1946-04-09 19:08:38.283986000 1987-12-12 13:07:09.551855000 -18581674972858511118533967530658935.634 +128692820 1646333132 gbm upb wrd 1937-10-17 19:27:32.150088000 1987-10-08 09:20:44.116985000 -14303157076423731323099151378621444.122 +1573849210 -1216519640 nid pkf cwi 1973-01-25 05:30:11.829231000 1939-02-06 16:19:36.256705000 4200848445661105366350011959305768.399 +-1383638990 215463808 rdx grm wrm 1995-01-04 13:40:20.318214000 1938-12-29 14:03:44.995783000 16885174181405174168258781992568616.794 +-1303028480 214808438 cni qcw sey 1949-12-25 01:38:15.329277000 1963-05-31 09:22:00.597388000 21163692077839953963693598144606108.306 +142324516 1646857428 jep lgr xje 1948-09-17 05:31:28.309634000 1987-11-03 10:51:18.290933000 18384472935248066102804877578442291.325 +1479738078 -1216257492 eyt sni sny 1934-02-13 13:43:12.191770000 1987-09-25 08:35:27.030011000 -11466497194125173028208178317265886.712 +1560348588 -1216912862 oje dxs fau 1998-09-20 20:09:57.363506000 1939-01-24 15:34:19.169731000 -7187979297690393232773362165228395.2 +-1289396784 215332734 fql hsn kfa 1960-11-24 11:42:11.488824000 1963-06-26 10:52:34.771336000 -9956813767340675664705758423202897.108 +48213384 214677364 aug oju oau 1946-04-22 19:53:55.370960000 1963-05-18 08:36:43.510414000 -18540135964155260829257901354425561.93 +128823894 1646464206 kfq ytf bvh 1937-10-30 20:12:49.237062000 1987-10-21 10:06:01.203959000 -14261618067720481033823085202388070.418 +1573980284 -1216388566 rmh toj gbm 1973-02-07 06:15:28.916205000 1939-02-19 17:04:53.343679000 4242387454364355655626078135539142.103 +-1383507916 -1217043936 vhc kvq bvq 1995-01-17 14:25:37.405188000 1939-01-11 14:49:02.082757000 16926713190108424457534848168801990.498 +-1302897406 214939512 grm ugb gbv 1950-01-07 02:23:32.416251000 1963-06-13 10:07:17.684362000 21205231086543204252969664320839482.01 +142455590 1646988502 nit pkv cni 1948-09-30 06:16:45.396608000 1987-11-16 11:36:35.377907000 18426011943951316392080943754675665.029 +1479869152 1646333132 idx wrm wrd 1934-02-26 14:28:29.278744000 1987-10-08 09:20:44.116985000 -11424958185421922738932112141032513.008 +1560479662 -1216781788 sni hcw cwi 1998-10-03 20:55:14.450480000 1939-02-06 16:19:36.256705000 -7146440288987142943497295988995021.496 +-1289265710 215463808 jup lwr oje 1960-12-07 12:27:28.575798000 1963-07-09 11:37:51.858310000 -9915274758637425375429692246969523.404 +48344458 214808438 eyk sny sey 1946-05-05 20:39:12.457934000 1963-05-31 09:22:00.597388000 -18498596955452010539981835178192188.226 +128954968 1646595280 \N dxj xje 1937-11-12 20:58:06.324036000 1987-11-03 10:51:18.290933000 -14220079059017230744547019026154696.714 +1574111358 -1216257492 oju xsn kfq 1973-02-20 07:00:46.003179000 1939-03-04 17:50:10.430653000 4283926463067605944902144311772515.807 +-1383376842 -1216912862 vql oau fau 1995-01-30 15:10:54.492162000 1939-01-24 15:34:19.169731000 16968252198811674746810914345035364.202 +-1302766332 215070586 alg ykf kfa 1950-01-20 03:08:49.503225000 1963-06-26 10:52:34.771336000 21246770095246454542245730497072855.714 +142586664 214677364 kvq toa grm 1948-10-13 07:02:02.483582000 1987-11-29 12:21:52.464881000 18467550952654566681357009930909038.733 +1480000226 1646464206 rmx bvq bvh 1934-03-11 15:13:46.365718000 1987-10-21 10:06:01.203959000 -11383419176718672449656045964799139.304 +1560610736 -1216650714 mhc lgb gbm 1998-10-16 21:40:31.537454000 1939-02-19 17:04:53.343679000 -7104901280283892654221229812761647.792 +-1289134636 -1217043936 wrm pbv sni 1960-12-20 13:12:45.662772000 1963-07-22 12:23:08.945284000 -9873735749934175086153626070736149.7 +48475532 214939512 nyt wrd gbv 1946-05-18 21:24:29.544908000 1963-06-13 10:07:17.684362000 -18457057946748760250705769001958814.522 +129086042 1646726354 ido hcn cni 1937-11-25 21:43:23.411010000 1987-11-16 11:36:35.377907000 -14178540050313980455270952849921323.01 +1574242432 1646333132 sny cwr oju 1973-03-05 07:46:03.090153000 1939-03-17 18:35:27.517627000 4325465471770856234178210488005889.511 +-1383245768 -1216781788 aup sey cwi 1995-02-12 15:56:11.579136000 1939-02-06 16:19:36.256705000 17009791207514925036086980521268737.906 +-1302635258 215201660 epk doj oje 1950-02-02 03:54:06.590199000 1963-07-09 11:37:51.858310000 20661171391050865060883708820716.202 +142717738 \N oau xse kvq 1948-10-26 07:47:19.570556000 1987-12-12 13:07:09.551855000 18509089961357816970633076107142412.437 +1480131300 214808438 vqc fau xje 1934-03-24 15:59:03.452692000 1987-11-03 10:51:18.290933000 -11341880168015422160379979788565765.6 +1560741810 1646595280 qlg pkf kfq 1998-10-29 22:25:48.624428000 1939-03-04 17:50:10.430653000 -7063362271580642364945163636528274.088 +-1289003562 -1216519640 bvq tfa wrm 1961-01-02 13:58:02.749746000 1938-12-29 14:03:44.995783000 -9832196741230924796877559894502775.996 +48606606 -1216912862 rdx bvh kfa 1946-05-31 22:09:46.631882000 1963-06-26 10:52:34.771336000 -18415518938045509961429702825725440.818 +129217116 215070586 mhs lgr grm 1937-12-08 22:28:40.497983000 1987-11-29 12:21:52.464881000 -14137001041610730165994886673687949.306 +1574373506 1646857428 wrd gbv sny 1973-03-18 08:31:20.177127000 1987-09-25 08:35:27.030011000 4367004480474106523454276664239263.215 +-1383114694 1646464206 eyt wid gbm 1995-02-25 16:41:28.666110000 1939-02-19 17:04:53.343679000 17051330216218175325363046697502111.61 +-1302504184 -1216650714 ito hsn sni 1950-02-15 04:39:23.677173000 1963-07-22 12:23:08.945284000 62200180094301154336949885054089.906 +142848812 215332734 sey cwi oau 1948-11-08 08:32:36.657530000 1963-05-18 08:36:43.510414000 18550628970061067259909142283375786.141 +1480262374 214939512 aug jey cni \N 1987-11-16 11:36:35.377907000 -11300341159312171871103913612332391.896 +1560872884 1646726354 upk toj oju 1934-04-06 16:44:20.539666000 1939-03-17 18:35:27.517627000 -7021823262877392075669097460294900.384 +-1288872488 -1216388566 fau xje bvq 1998-11-11 23:11:05.711402000 1939-01-11 14:49:02.082757000 -9790657732527674507601493718269402.292 +48737680 -1216781788 vhc fal oje 1961-01-15 14:43:19.836720000 1963-07-09 11:37:51.858310000 -18373979929342259672153636649492067.114 +129348190 215201660 qlw pkv kvq 1946-06-13 22:55:03.718856000 1987-12-12 13:07:09.551855000 -14095462032907479876718820497454575.602 +1574504580 1646988502 bvh kfa wrd 1937-12-21 23:13:57.584957000 1987-10-08 09:20:44.116985000 4408543489177356812730342840472636.919 +-1382983620 1646595280 idx bmh kfq 1973-03-31 09:16:37.264101000 1939-03-04 17:50:10.430653000 17092869224921425614639112873735485.314 +-1302373110 -1216519640 mxs lwr wrm 1995-03-10 17:26:45.753084000 1938-12-29 14:03:44.995783000 103739188797551443613016061287463.61 +142979886 215463808 wid gbm sey 1950-02-28 05:24:40.764147000 1963-05-31 09:22:00.597388000 18592167978764317549185208459609159.845 +1480393448 215070586 eyk nid grm 1948-11-21 09:17:53.744504000 1987-11-29 12:21:52.464881000 -11258802150608921581827847436099018.192 +1561003958 1646857428 yto xsn sny 1934-04-19 17:29:37.626640000 1987-09-25 08:35:27.030011000 -6980284254174141786393031284061526.68 +-1288741414 -1216257492 jey cni fau 1998-11-24 23:56:22.798376000 1939-01-24 15:34:19.169731000 -9749118723824424218325427542036028.588 +48868754 -1216650714 alg jep sni 1961-01-28 15:28:36.923694000 1963-07-22 12:23:08.945284000 -18332440920639009382877570473258693.41 +129479264 215332734 upb toa oau 1946-06-26 23:40:20.805830000 1963-05-18 08:36:43.510414000 -14053923024204229587442754321221201.898 +1574635654 214677364 fal oje bvh 1938-01-03 23:59:14.671931000 1987-10-21 10:06:01.203959000 4450082497880607102006409016706010.623 +-1382852546 1646726354 mhc fql oju 1973-04-13 10:01:54.351075000 1939-03-17 18:35:27.517627000 17134408233624675903915179049968859.018 +-1302242036 -1216388566 qcw pbv bvq 1995-03-23 18:12:02.840058000 1939-01-11 14:49:02.082757000 145278197500801732889082237520837.314 +143110960 -1217043936 bmh kfq gbv 1950-03-13 06:09:57.851121000 1963-06-13 10:07:17.684362000 18633706987467567838461274635842533.549 +1480524522 215201660 ido rmh kvq 1948-12-04 10:03:10.831478000 1987-12-12 13:07:09.551855000 -11217263141905671292551781259865644.488 diff --git a/tests/queries/0_stateless/02998_native_parquet_reader.sh b/tests/queries/0_stateless/02998_native_parquet_reader.sh new file mode 100755 index 00000000000..d6369c4921b --- /dev/null +++ b/tests/queries/0_stateless/02998_native_parquet_reader.sh @@ -0,0 +1,211 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +PAR_PATH="$CURDIR"/data_parquet/native_parquet_reader.parquet +# the content of parquet file can be generated by following codes +# < +# #include +# #include +# #include +# #include +# #include +# #include +# #include +# #include +# #include +# #include +# #include +# +# namespace +# { +# +# using namespace DB; +# +# const UInt32 ROW_NUM = 2000; +# const UInt32 MIN_STRING_LEN = 3; +# const UInt32 MAX_STRING_LEN = 5; +# +# const UInt32 PLAIN_ENCODING_CARDINALITY = ROW_NUM * 2; +# const UInt32 MIX_ENCODING_CARDINALITY = 800; +# const UInt32 DICT_ENCODING_CARDINALITY = 20; +# +# UInt16 nextNum() +# { +# static UInt16 idx = 0; +# static UInt16 nums[] = {0, 21845, 43690}; +# static size_t nums_len = sizeof(nums) / sizeof(nums[0]); +# return nums[(idx++) % nums_len]++; +# } +# +# template +# void generateValues(MutableColumnPtr & col, size_t num) +# { +# using FieldType = typename NumericDataType::FieldType; +# +# const size_t next_num_bytes = sizeof(nextNum()); +# char bytewise_val[sizeof(FieldType)]; +# +# while (col->size() < num) +# { +# for (auto bytes = 0; bytes < sizeof(FieldType); bytes += next_num_bytes) +# { +# auto tmp = nextNum(); +# memcpy(bytewise_val + bytes, &tmp, std::min(next_num_bytes, sizeof(FieldType) - bytes)); +# } +# if (is_decimal) +# { +# // clean highest 3 bits, make sure the result doest not exceed the limits of the decimal type +# if (bytewise_val[sizeof(FieldType) - 1] > 0) +# bytewise_val[sizeof(FieldType) - 1] &= 0x0f; +# else +# bytewise_val[sizeof(FieldType) - 1] |= 0xf0; +# } +# FieldType val; +# memcpy(&val, &bytewise_val, sizeof(FieldType)); +# col->insert(val); +# } +# } +# +# template <> +# void generateValues(MutableColumnPtr & col, size_t num) +# { +# std::string str; +# while (col->size() < num) +# { +# auto len = MIN_STRING_LEN + nextNum() % (MAX_STRING_LEN - MIN_STRING_LEN); +# str.clear(); +# for (size_t i = 0; i < len; i++) +# { +# str.push_back('a' + nextNum() % ('z' - 'a')); +# } +# col->insert(str); +# } +# } +# +# template +# ColumnWithTypeAndName generateColumn( +# std::shared_ptr ch_type, +# size_t cardinality, +# const std::string & col_name, +# const std::set & null_indice) +# { +# DataTypePtr col_type = ch_type; +# if (!null_indice.empty()) +# { +# col_type = std::make_shared(ch_type); +# } +# +# auto values = ch_type->createColumn(); +# values->reserve(cardinality); +# generateValues(values, cardinality); +# +# auto col = col_type->createColumn(); +# col->reserve(ROW_NUM); +# for (size_t i = 0; i < ROW_NUM; i++) +# { +# if (!null_indice.empty() && null_indice.contains(i)) +# { +# col->insert(Null()); +# } +# else +# { +# col->insert(values->operator[](nextNum() % cardinality)); +# } +# } +# return {std::move(col), col_type, col_name}; +# } +# +# Block generateBlock() +# { +# ColumnsWithTypeAndName cols; +# +# // test Int32 type +# std::set null_indice{512, 1001, 211, 392, 553, 1725}; +# // Nullability is expressed by definition level, and encoded by bit packed with smallest group size of 8 +# // when null value appeared. Here we make a big bit packed group with more than 1000 values. +# for (size_t i = 0; i < 170; i++) +# { +# null_indice.emplace(622 + i * 6); +# } +# cols.emplace_back(generateColumn( +# std::make_shared(), PLAIN_ENCODING_CARDINALITY, "plain_encoding_i32", null_indice)); +# null_indice = {917, 482, 283, 580, 1926, 1667, 1971}; +# cols.emplace_back(generateColumn( +# std::make_shared(), DICT_ENCODING_CARDINALITY, "dict_encoding_i32", null_indice)); +# +# // test string type +# null_indice = {818, 928, 1958, 1141, 1553, 1407, 690, 1769}; +# cols.emplace_back(generateColumn( +# std::make_shared(), PLAIN_ENCODING_CARDINALITY, "plain_encoding_str", null_indice)); +# null_indice = {1441, 1747, 216, 1209, 89, 52, 536, 625}; +# cols.emplace_back(generateColumn( +# std::make_shared(), MIX_ENCODING_CARDINALITY, "mix_encoding_str", null_indice)); +# null_indice = {1478, 1862, 894, 1314, 1844, 243, 869, 551}; +# cols.emplace_back(generateColumn( +# std::make_shared(), DICT_ENCODING_CARDINALITY, "dict_encoding_str", null_indice)); +# +# // test DateTime64 type +# auto dt_type = std::make_shared(ParquetRecordReader::default_datetime64_scale); +# null_indice = {1078, 112, 1981, 795, 371, 1176, 1526, 11}; +# cols.emplace_back(generateColumn(dt_type, PLAIN_ENCODING_CARDINALITY, "plain_encoding_dt64", null_indice)); +# null_indice = {1734, 1153, 1893, 1205, 644, 1670, 1482, 1479}; +# cols.emplace_back(generateColumn(dt_type, DICT_ENCODING_CARDINALITY, "dict_encoding_dt64", null_indice)); +# +# // test Decimal128 type +# auto d128_type = std::make_shared(DecimalUtils::max_precision, 3); +# null_indice = {852, 1448, 1569, 896, 1866, 1655, 100, 418}; +# cols.emplace_back(generateColumn(d128_type, PLAIN_ENCODING_CARDINALITY, "plain_encoding_decimal128", null_indice)); +# +# return {cols}; +# } +# +# void dumpBlock(const Block & block) +# { +# WriteBufferFromFile output_buf("/tmp/ut-out.csv"); +# auto out = getContext().context->getOutputFormat("CSVWithNames", output_buf, block); +# out->write(block); +# out->finalize(); +# std::cerr << block.dumpStructure() << std::endl << std::endl; +# } +# +# } +# +# EndOfCodes +# +# How to generate the parquet file: +# 1. Use above C++ codes. +# Put above codes in src/Common/tests/gtest_main.cpp, add following two inlines in main function: +# tryRegisterFormats(); +# dumpBlock(generateBlock()); +# 2. Genetate /tmp/ut-out.csv. +# After compiled, run any test, such as "./src/unit_tests_dbms --gtest_filter=IColumn.dumpStructure", +# 3. Generate the parquet file by following spark sql +# create temporary view tv using csv options('path' '/tmp/ut-out.csv', 'header' 'true', 'nullValue' '\\N'); +# insert overwrite directory "/tmp/test-parquet" using Parquet +# options('parquet.dictionary.page.size' '500') +# select /*+ COALESCE(1) */ cast(plain_encoding_i32 as int), cast(dict_encoding_i32 as int), +# plain_encoding_str, mix_encoding_str, dict_encoding_str, +# cast(plain_encoding_dt64 as timestamp), cast(dict_encoding_dt64 as timestamp), +# cast(plain_encoding_decimal128 as decimal(38, 3)) +# from tv; +# + +CH_SCHEMA="\ + plain_encoding_i32 Nullable(Int32), \ + dict_encoding_i32 Nullable(Int32), \ + plain_encoding_str Nullable(String), \ + mix_encoding_str Nullable(String), \ + dict_encoding_str LowCardinality(Nullable(String)), \ + plain_encoding_dt64 Nullable(DateTime64(9, \\'UTC\\')), \ + dict_encoding_dt64 Nullable(DateTime64(9, \\'UTC\\')), \ + plain_encoding_decimal128 Nullable(Decimal(38, 3))" +QUERY="SELECT * from file('$PAR_PATH', 'Parquet', '$CH_SCHEMA')" + +# there may be more than on group in parquet files, unstable results may generated by multithreads +$CLICKHOUSE_LOCAL --multiquery --max_threads 1 --max_parsing_threads 1 --input_format_parquet_use_native_reader true --query "$QUERY" diff --git a/tests/queries/0_stateless/03020_order_by_SimpleAggregateFunction.sql b/tests/queries/0_stateless/03020_order_by_SimpleAggregateFunction.sql index f1727cb9e5c..fee42d1abc6 100644 --- a/tests/queries/0_stateless/03020_order_by_SimpleAggregateFunction.sql +++ b/tests/queries/0_stateless/03020_order_by_SimpleAggregateFunction.sql @@ -1,6 +1,6 @@ set allow_suspicious_primary_key = 0; -DROP TABLE IF EXISTS data; +drop table if exists data; create table data (key Int, value AggregateFunction(sum, UInt64)) engine=AggregatingMergeTree() order by (key, value); -- { serverError DATA_TYPE_CANNOT_BE_USED_IN_KEY } create table data (key Int, value SimpleAggregateFunction(sum, UInt64)) engine=AggregatingMergeTree() order by (key, value); -- { serverError DATA_TYPE_CANNOT_BE_USED_IN_KEY } @@ -12,7 +12,22 @@ create table data (key Int, value AggregateFunction(sum, UInt64)) engine=Aggrega create table data (key Int, value SimpleAggregateFunction(sum, UInt64)) engine=AggregatingMergeTree() primary key value order by (value, key); -- { serverError DATA_TYPE_CANNOT_BE_USED_IN_KEY } set allow_suspicious_primary_key = 1; - create table data (key Int, value SimpleAggregateFunction(sum, UInt64)) engine=AggregatingMergeTree() primary key value order by (value, key); -DROP TABLE data; +-- ATTACH should work regardless allow_suspicious_primary_key +set allow_suspicious_primary_key = 0; +detach table data; +attach table data; +drop table data; + +-- ALTER AggregatingMergeTree +create table data (key Int) engine=AggregatingMergeTree() order by (key); +alter table data add column value SimpleAggregateFunction(sum, UInt64), modify order by (key, value); -- { serverError DATA_TYPE_CANNOT_BE_USED_IN_KEY } +alter table data add column value SimpleAggregateFunction(sum, UInt64), modify order by (key, value) settings allow_suspicious_primary_key=1; +drop table data; + +-- ALTER ReplicatedAggregatingMergeTree +create table data_rep (key Int) engine=ReplicatedAggregatingMergeTree('/tables/{database}', 'r1') order by (key); +alter table data_rep add column value SimpleAggregateFunction(sum, UInt64), modify order by (key, value); -- { serverError DATA_TYPE_CANNOT_BE_USED_IN_KEY } +alter table data_rep add column value SimpleAggregateFunction(sum, UInt64), modify order by (key, value) settings allow_suspicious_primary_key=1; +drop table data_rep; diff --git a/tests/queries/0_stateless/03033_dynamic_text_serialization.reference b/tests/queries/0_stateless/03033_dynamic_text_serialization.reference new file mode 100644 index 00000000000..d965245266c --- /dev/null +++ b/tests/queries/0_stateless/03033_dynamic_text_serialization.reference @@ -0,0 +1,55 @@ +JSON +{"d":"42","dynamicType(d)":"Int64"} +{"d":42.42,"dynamicType(d)":"Float64"} +{"d":"str","dynamicType(d)":"String"} +{"d":["1","2","3"],"dynamicType(d)":"Array(Int64)"} +{"d":"2020-01-01","dynamicType(d)":"Date"} +{"d":"2020-01-01 10:00:00.000000000","dynamicType(d)":"DateTime64(9)"} +{"d":{"a":"42","b":"str"},"dynamicType(d)":"Tuple(a Int64, b String)"} +{"d":{"a":"43"},"dynamicType(d)":"Tuple(a Int64)"} +{"d":{"a":"44","c":["1","2","3"]},"dynamicType(d)":"Tuple(a Int64, c Array(Int64))"} +{"d":["1","str",["1","2","3"]],"dynamicType(d)":"Tuple(Int64, String, Array(Int64))"} +{"d":null,"dynamicType(d)":"None"} +{"d":true,"dynamicType(d)":"Bool"} +{"d":"42","dynamicType(d)":"Int64"} +{"d":"42.42","dynamicType(d)":"String"} +{"d":"str","dynamicType(d)":"String"} +{"d":null,"dynamicType(d)":"None"} +{"d":"1","dynamicType(d)":"Int64"} +CSV +42,"Int64" +42.42,"Float64" +"str","String" +"[1,2,3]","Array(Int64)" +"2020-01-01","Date" +"2020-01-01 10:00:00.000000000","DateTime64(9)" +"[1, 'str', [1, 2, 3]]","String" +\N,"None" +true,"Bool" +TSV +42 Int64 +42.42 Float64 +str String +[1,2,3] Array(Int64) +2020-01-01 Date +2020-01-01 10:00:00.000000000 DateTime64(9) +[1, \'str\', [1, 2, 3]] String +\N None +true Bool +Values +(42,'Int64'),(42.42,'Float64'),('str','String'),([1,2,3],'Array(Int64)'),('2020-01-01','Date'),('2020-01-01 10:00:00.000000000','DateTime64(9)'),(NULL,'None'),(true,'Bool') +Cast using parsing +42 Int64 +42.42 Float64 +[1,2,3] Array(Int64) +2020-01-01 Date +2020-01-01 10:00:00.000000000 DateTime64(9) +\N None +true Bool +42 Int64 +42.42 Float64 +[1, 2, 3] String +2020-01-01 String +2020-01-01 10:00:00 String +\N None +true String diff --git a/tests/queries/0_stateless/03033_dynamic_text_serialization.sql b/tests/queries/0_stateless/03033_dynamic_text_serialization.sql new file mode 100644 index 00000000000..d12d110fe28 --- /dev/null +++ b/tests/queries/0_stateless/03033_dynamic_text_serialization.sql @@ -0,0 +1,74 @@ +set allow_experimental_dynamic_type = 1; + +select 'JSON'; +select d, dynamicType(d) from format(JSONEachRow, 'd Dynamic', $$ +{"d" : 42} +{"d" : 42.42} +{"d" : "str"} +{"d" : [1, 2, 3]} +{"d" : "2020-01-01"} +{"d" : "2020-01-01 10:00:00"} +{"d" : {"a" : 42, "b" : "str"}} +{"d" : {"a" : 43}} +{"d" : {"a" : 44, "c" : [1, 2, 3]}} +{"d" : [1, "str", [1, 2, 3]]} +{"d" : null} +{"d" : true} +$$) format JSONEachRow; + +select d, dynamicType(d) from format(JSONEachRow, 'd Dynamic(max_types=2)', $$ +{"d" : 42} +{"d" : 42.42} +{"d" : "str"} +{"d" : null} +{"d" : true} +$$) format JSONEachRow; + +select 'CSV'; +select d, dynamicType(d) from format(CSV, 'd Dynamic', +$$42 +42.42 +"str" +"[1, 2, 3]" +"2020-01-01" +"2020-01-01 10:00:00" +"[1, 'str', [1, 2, 3]]" +\N +true +$$) format CSV; + +select 'TSV'; +select d, dynamicType(d) from format(TSV, 'd Dynamic', +$$42 +42.42 +str +[1, 2, 3] +2020-01-01 +2020-01-01 10:00:00 +[1, 'str', [1, 2, 3]] +\N +true +$$) format TSV; + +select 'Values'; +select d, dynamicType(d) from format(Values, 'd Dynamic', $$ +(42) +(42.42) +('str') +([1, 2, 3]) +('2020-01-01') +('2020-01-01 10:00:00') +(NULL) +(true) +$$) format Values; +select ''; + +select 'Cast using parsing'; +drop table if exists test; +create table test (s String) engine=Memory; +insert into test values ('42'), ('42.42'), ('[1, 2, 3]'), ('2020-01-01'), ('2020-01-01 10:00:00'), ('NULL'), ('true'); +set cast_string_to_dynamic_use_inference=1; +select s::Dynamic as d, dynamicType(d) from test; +select s::Dynamic(max_types=3) as d, dynamicType(d) from test; +drop table test; + diff --git a/tests/queries/0_stateless/03033_final_undefined_last_mark.reference b/tests/queries/0_stateless/03033_final_undefined_last_mark.reference new file mode 100644 index 00000000000..a30b755709b --- /dev/null +++ b/tests/queries/0_stateless/03033_final_undefined_last_mark.reference @@ -0,0 +1,2 @@ +Disabled 11338881281426660955 14765404159170880511 +Enabled 11338881281426660955 14765404159170880511 diff --git a/tests/queries/0_stateless/03033_final_undefined_last_mark.sql b/tests/queries/0_stateless/03033_final_undefined_last_mark.sql new file mode 100644 index 00000000000..25a30a365a5 --- /dev/null +++ b/tests/queries/0_stateless/03033_final_undefined_last_mark.sql @@ -0,0 +1,23 @@ +-- Tags: no-random-settings, no-random-merge-tree-settings + +DROP TABLE IF EXISTS account_test; + +CREATE TABLE account_test +( + `id` UInt64, + `row_ver` UInt64, +) +ENGINE = ReplacingMergeTree(row_ver) +ORDER BY id +SETTINGS index_granularity = 16, index_granularity_bytes = 0, + min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0, + min_rows_for_compact_part = 0, min_bytes_for_compact_part = 0; + +SYSTEM STOP MERGES account_test; + +INSERT INTO account_test VALUES (11338881281426660955,717769962224129342),(12484100559155738267,7950971667203174918),(7603729260199571867,3255798127676911942),(7023543111808724827,911615979861855126),(10293135086416484571,3264379259750736572),(15561193439904316763,8419819469587131454),(17632407413882870235,7252071832370181502),(17009726455991851227,7525297506591593939),(12392078953873778779,8473049173389293961),(15283366022689446555,11692491360262171467),(9087459014730986523,2783662960221838603),(293823584550906267,4847630088179732782),(15693186194430465755,8163804880526285623),(7353080168325584795,17315892478487497859),(5980311238303466523,6943353798059390089),(14242621660019578011,8684624667957352769),(8241843507567433563,15731952080102886438); +INSERT INTO account_test VALUES (11338881281426660955, 14765404159170880511); + +SELECT 'Disabled', * FROM account_test FINAL WHERE id = 11338881281426660955 SETTINGS split_parts_ranges_into_intersecting_and_non_intersecting_final = 0; +SELECT 'Enabled', * FROM account_test FINAL WHERE id = 11338881281426660955 SETTINGS split_parts_ranges_into_intersecting_and_non_intersecting_final = 1; + diff --git a/tests/queries/0_stateless/03034_dynamic_conversions.reference b/tests/queries/0_stateless/03034_dynamic_conversions.reference new file mode 100644 index 00000000000..45f94f7ecc4 --- /dev/null +++ b/tests/queries/0_stateless/03034_dynamic_conversions.reference @@ -0,0 +1,88 @@ +0 UInt64 +1 UInt64 +2 UInt64 +0 String +1 String +2 String +0 +1 +2 +0 +1 +2 +1970-01-01 +1970-01-02 +1970-01-03 +0 UInt64 +1 UInt64 +2 UInt64 +0 UInt64 +\N None +2 UInt64 +0 UInt64 +str_1 String +[0,1] Array(UInt64) +\N None +4 UInt64 +str_5 String +0 String +str_1 String +[0,1] String +\N None +4 String +str_5 String +0 UInt64 +str_1 String +[0,1] String +\N None +4 UInt64 +str_5 String +0 UInt64 +str_1 String +[0,1] Array(UInt64) +\N None +4 UInt64 +str_5 String +0 +1 +2 +0 +1 +2 +0 UInt64 +str_1 String +[0,1] String +\N None +4 UInt64 +str_5 String +0 UInt64 +1970-01-02 Date +[0,1] String +\N None +4 UInt64 +1970-01-06 Date +0 +42 +42.42 +1 +0 +\N +42 +42.42 +1 +0 + +42 +42.42 +true +e10 +\N +42 +42.42 +true +e10 +\N +42 +\N +1 +\N diff --git a/tests/queries/0_stateless/03034_dynamic_conversions.sql b/tests/queries/0_stateless/03034_dynamic_conversions.sql new file mode 100644 index 00000000000..ed75fbf2377 --- /dev/null +++ b/tests/queries/0_stateless/03034_dynamic_conversions.sql @@ -0,0 +1,34 @@ +set allow_experimental_dynamic_type=1; +set allow_experimental_variant_type=1; +set use_variant_as_common_type=1; + +select number::Dynamic as d, dynamicType(d) from numbers(3); +select number::Dynamic(max_types=1) as d, dynamicType(d) from numbers(3); +select number::Dynamic::UInt64 as v from numbers(3); +select number::Dynamic::String as v from numbers(3); +select number::Dynamic::Date as v from numbers(3); +select number::Dynamic::Array(UInt64) as v from numbers(3); -- {serverError TYPE_MISMATCH} +select number::Dynamic::Variant(UInt64, String) as v, variantType(v) from numbers(3); +select (number % 2 ? NULL : number)::Dynamic as d, dynamicType(d) from numbers(3); + +select multiIf(number % 4 == 0, number, number % 4 == 1, 'str_' || toString(number), number % 4 == 2, range(number), NULL)::Dynamic as d, dynamicType(d) from numbers(6); +select multiIf(number % 4 == 0, number, number % 4 == 1, 'str_' || toString(number), number % 4 == 2, range(number), NULL)::Dynamic(max_types=1) as d, dynamicType(d) from numbers(6); +select multiIf(number % 4 == 0, number, number % 4 == 1, 'str_' || toString(number), number % 4 == 2, range(number), NULL)::Dynamic(max_types=2) as d, dynamicType(d) from numbers(6); +select multiIf(number % 4 == 0, number, number % 4 == 1, 'str_' || toString(number), number % 4 == 2, range(number), NULL)::Dynamic(max_types=3) as d, dynamicType(d) from numbers(6); + +select number::Dynamic(max_types=2)::Dynamic(max_types=3) as d from numbers(3); +select number::Dynamic(max_types=2)::Dynamic(max_types=1) as d from numbers(3); +select multiIf(number % 4 == 0, number, number % 4 == 1, 'str_' || toString(number), number % 4 == 2, range(number), NULL)::Dynamic(max_types=3)::Dynamic(max_types=2) as d, dynamicType(d) from numbers(6); +select multiIf(number % 4 == 0, number, number % 4 == 1, toDate(number), number % 4 == 2, range(number), NULL)::Dynamic(max_types=4)::Dynamic(max_types=3) as d, dynamicType(d) from numbers(6); + + +create table test (d Dynamic) engine = Memory; +insert into test values (NULL), (42), ('42.42'), (true), ('e10'); +select d::Float64 from test; +select d::Nullable(Float64) from test; +select d::String from test; +select d::Nullable(String) from test; +select d::UInt64 from test; -- {serverError CANNOT_PARSE_TEXT} +select d::Nullable(UInt64) from test; +select d::Date from test; -- {serverError CANNOT_PARSE_DATE} + diff --git a/tests/queries/0_stateless/03035_dynamic_sorting.reference b/tests/queries/0_stateless/03035_dynamic_sorting.reference new file mode 100644 index 00000000000..9b8df11c7a9 --- /dev/null +++ b/tests/queries/0_stateless/03035_dynamic_sorting.reference @@ -0,0 +1,299 @@ +order by d1 nulls first +\N None +\N None +\N None +\N None +[1,2,3] Array(Int64) +[1,2,3] Array(Int64) +[1,2,3] Array(Int64) +[1,2,3] Array(Int64) +[1,2,3] Array(Int64) +[1,2,4] Array(Int64) +42 Int64 +42 Int64 +42 Int64 +42 Int64 +42 Int64 +43 Int64 +abc String +abc String +abc String +abc String +abc String +abd String +order by d1 nulls last +[1,2,3] Array(Int64) +[1,2,3] Array(Int64) +[1,2,3] Array(Int64) +[1,2,3] Array(Int64) +[1,2,3] Array(Int64) +[1,2,4] Array(Int64) +42 Int64 +42 Int64 +42 Int64 +42 Int64 +42 Int64 +43 Int64 +abc String +abc String +abc String +abc String +abc String +abd String +\N None +\N None +\N None +\N None +order by d2 nulls first +\N None +\N None +\N None +\N None +[1,2,3] Array(Int64) +[1,2,3] Array(Int64) +[1,2,3] Array(Int64) +[1,2,3] Array(Int64) +[1,2,3] Array(Int64) +[1,2,4] Array(Int64) +42 Int64 +42 Int64 +42 Int64 +42 Int64 +42 Int64 +43 Int64 +abc String +abc String +abc String +abc String +abc String +abd String +order by d2 nulls last +[1,2,3] Array(Int64) +[1,2,3] Array(Int64) +[1,2,3] Array(Int64) +[1,2,3] Array(Int64) +[1,2,3] Array(Int64) +[1,2,4] Array(Int64) +42 Int64 +42 Int64 +42 Int64 +42 Int64 +42 Int64 +43 Int64 +abc String +abc String +abc String +abc String +abc String +abd String +\N None +\N None +\N None +\N None +order by d1, d2 nulls first +[1,2,3] \N Array(Int64) None +[1,2,3] [1,2,3] Array(Int64) Array(Int64) +[1,2,3] [1,2,4] Array(Int64) Array(Int64) +[1,2,3] 42 Array(Int64) Int64 +[1,2,3] abc Array(Int64) String +[1,2,4] [1,2,3] Array(Int64) Array(Int64) +42 \N Int64 None +42 [1,2,3] Int64 Array(Int64) +42 42 Int64 Int64 +42 43 Int64 Int64 +42 abc Int64 String +43 42 Int64 Int64 +abc \N String None +abc [1,2,3] String Array(Int64) +abc 42 String Int64 +abc abc String String +abc abd String String +abd abc String String +\N \N None None +\N [1,2,3] None Array(Int64) +\N 42 None Int64 +\N abc None String +order by d1, d2 nulls last +[1,2,3] [1,2,3] Array(Int64) Array(Int64) +[1,2,3] [1,2,4] Array(Int64) Array(Int64) +[1,2,3] 42 Array(Int64) Int64 +[1,2,3] abc Array(Int64) String +[1,2,3] \N Array(Int64) None +[1,2,4] [1,2,3] Array(Int64) Array(Int64) +42 [1,2,3] Int64 Array(Int64) +42 42 Int64 Int64 +42 43 Int64 Int64 +42 abc Int64 String +42 \N Int64 None +43 42 Int64 Int64 +abc [1,2,3] String Array(Int64) +abc 42 String Int64 +abc abc String String +abc abd String String +abc \N String None +abd abc String String +\N [1,2,3] None Array(Int64) +\N 42 None Int64 +\N abc None String +\N \N None None +order by d2, d1 nulls first +\N [1,2,3] None Array(Int64) +[1,2,3] [1,2,3] Array(Int64) Array(Int64) +[1,2,4] [1,2,3] Array(Int64) Array(Int64) +42 [1,2,3] Int64 Array(Int64) +abc [1,2,3] String Array(Int64) +[1,2,3] [1,2,4] Array(Int64) Array(Int64) +\N 42 None Int64 +[1,2,3] 42 Array(Int64) Int64 +42 42 Int64 Int64 +43 42 Int64 Int64 +abc 42 String Int64 +42 43 Int64 Int64 +\N abc None String +[1,2,3] abc Array(Int64) String +42 abc Int64 String +abc abc String String +abd abc String String +abc abd String String +\N \N None None +[1,2,3] \N Array(Int64) None +42 \N Int64 None +abc \N String None +order by d2, d1 nulls last +[1,2,3] [1,2,3] Array(Int64) Array(Int64) +[1,2,4] [1,2,3] Array(Int64) Array(Int64) +42 [1,2,3] Int64 Array(Int64) +abc [1,2,3] String Array(Int64) +\N [1,2,3] None Array(Int64) +[1,2,3] [1,2,4] Array(Int64) Array(Int64) +[1,2,3] 42 Array(Int64) Int64 +42 42 Int64 Int64 +43 42 Int64 Int64 +abc 42 String Int64 +\N 42 None Int64 +42 43 Int64 Int64 +[1,2,3] abc Array(Int64) String +42 abc Int64 String +abc abc String String +abd abc String String +\N abc None String +abc abd String String +[1,2,3] \N Array(Int64) None +42 \N Int64 None +abc \N String None +\N \N None None +d1 = d2 +[1,2,3] [1,2,3] 1 Array(Int64) Array(Int64) +[1,2,3] [1,2,4] 0 Array(Int64) Array(Int64) +[1,2,3] 42 0 Array(Int64) Int64 +[1,2,3] abc 0 Array(Int64) String +[1,2,3] \N 0 Array(Int64) None +[1,2,4] [1,2,3] 0 Array(Int64) Array(Int64) +42 [1,2,3] 0 Int64 Array(Int64) +42 42 1 Int64 Int64 +42 43 0 Int64 Int64 +42 abc 0 Int64 String +42 \N 0 Int64 None +43 42 0 Int64 Int64 +abc [1,2,3] 0 String Array(Int64) +abc 42 0 String Int64 +abc abc 1 String String +abc abd 0 String String +abc \N 0 String None +abd abc 0 String String +\N [1,2,3] 0 None Array(Int64) +\N 42 0 None Int64 +\N abc 0 None String +\N \N 1 None None +d1 < d2 +[1,2,3] [1,2,3] 0 Array(Int64) Array(Int64) +[1,2,3] [1,2,4] 1 Array(Int64) Array(Int64) +[1,2,3] 42 1 Array(Int64) Int64 +[1,2,3] abc 1 Array(Int64) String +[1,2,3] \N 1 Array(Int64) None +[1,2,4] [1,2,3] 0 Array(Int64) Array(Int64) +42 [1,2,3] 0 Int64 Array(Int64) +42 42 0 Int64 Int64 +42 43 1 Int64 Int64 +42 abc 1 Int64 String +42 \N 1 Int64 None +43 42 0 Int64 Int64 +abc [1,2,3] 0 String Array(Int64) +abc 42 0 String Int64 +abc abc 0 String String +abc abd 1 String String +abc \N 1 String None +abd abc 0 String String +\N [1,2,3] 0 None Array(Int64) +\N 42 0 None Int64 +\N abc 0 None String +\N \N 0 None None +d1 <= d2 +[1,2,3] [1,2,3] 1 Array(Int64) Array(Int64) +[1,2,3] [1,2,4] 1 Array(Int64) Array(Int64) +[1,2,3] 42 1 Array(Int64) Int64 +[1,2,3] abc 1 Array(Int64) String +[1,2,3] \N 1 Array(Int64) None +[1,2,4] [1,2,3] 0 Array(Int64) Array(Int64) +42 [1,2,3] 0 Int64 Array(Int64) +42 42 1 Int64 Int64 +42 43 1 Int64 Int64 +42 abc 1 Int64 String +42 \N 1 Int64 None +43 42 0 Int64 Int64 +abc [1,2,3] 0 String Array(Int64) +abc 42 0 String Int64 +abc abc 1 String String +abc abd 1 String String +abc \N 1 String None +abd abc 0 String String +\N [1,2,3] 0 None Array(Int64) +\N 42 0 None Int64 +\N abc 0 None String +\N \N 1 None None +d1 > d2 +[1,2,3] [1,2,3] 0 Array(Int64) Array(Int64) +[1,2,3] [1,2,4] 0 Array(Int64) Array(Int64) +[1,2,3] 42 0 Array(Int64) Int64 +[1,2,3] abc 0 Array(Int64) String +[1,2,3] \N 0 Array(Int64) None +[1,2,4] [1,2,3] 1 Array(Int64) Array(Int64) +42 [1,2,3] 1 Int64 Array(Int64) +42 42 0 Int64 Int64 +42 43 0 Int64 Int64 +42 abc 0 Int64 String +42 \N 0 Int64 None +43 42 1 Int64 Int64 +abc [1,2,3] 1 String Array(Int64) +abc 42 1 String Int64 +abc abc 0 String String +abc abd 0 String String +abc \N 0 String None +abd abc 1 String String +\N [1,2,3] 1 None Array(Int64) +\N 42 1 None Int64 +\N abc 1 None String +\N \N 0 None None +d1 >= d2 +[1,2,3] [1,2,3] 1 Array(Int64) Array(Int64) +[1,2,3] [1,2,4] 1 Array(Int64) Array(Int64) +[1,2,3] 42 1 Array(Int64) Int64 +[1,2,3] abc 1 Array(Int64) String +[1,2,3] \N 1 Array(Int64) None +[1,2,4] [1,2,3] 1 Array(Int64) Array(Int64) +42 [1,2,3] 1 Int64 Array(Int64) +42 42 1 Int64 Int64 +42 43 1 Int64 Int64 +42 abc 1 Int64 String +42 \N 1 Int64 None +43 42 1 Int64 Int64 +abc [1,2,3] 1 String Array(Int64) +abc 42 1 String Int64 +abc abc 1 String String +abc abd 1 String String +abc \N 1 String None +abd abc 1 String String +\N [1,2,3] 1 None Array(Int64) +\N 42 1 None Int64 +\N abc 1 None String +\N \N 1 None None diff --git a/tests/queries/0_stateless/03035_dynamic_sorting.sql b/tests/queries/0_stateless/03035_dynamic_sorting.sql new file mode 100644 index 00000000000..0487fafc955 --- /dev/null +++ b/tests/queries/0_stateless/03035_dynamic_sorting.sql @@ -0,0 +1,80 @@ +set allow_experimental_dynamic_type = 1; + +drop table if exists test; +create table test (d1 Dynamic, d2 Dynamic) engine=Memory; + +insert into test values (42, 42); +insert into test values (42, 43); +insert into test values (43, 42); + +insert into test values ('abc', 'abc'); +insert into test values ('abc', 'abd'); +insert into test values ('abd', 'abc'); + +insert into test values ([1,2,3], [1,2,3]); +insert into test values ([1,2,3], [1,2,4]); +insert into test values ([1,2,4], [1,2,3]); + +insert into test values (NULL, NULL); + +insert into test values (42, 'abc'); +insert into test values ('abc', 42); + +insert into test values (42, [1,2,3]); +insert into test values ([1,2,3], 42); + +insert into test values (42, NULL); +insert into test values (NULL, 42); + +insert into test values ('abc', [1,2,3]); +insert into test values ([1,2,3], 'abc'); + +insert into test values ('abc', NULL); +insert into test values (NULL, 'abc'); + +insert into test values ([1,2,3], NULL); +insert into test values (NULL, [1,2,3]); + + +select 'order by d1 nulls first'; +select d1, dynamicType(d1) from test order by d1 nulls first; + +select 'order by d1 nulls last'; +select d1, dynamicType(d1) from test order by d1 nulls last; + +select 'order by d2 nulls first'; +select d2, dynamicType(d2) from test order by d2 nulls first; + +select 'order by d2 nulls last'; +select d2, dynamicType(d2) from test order by d2 nulls last; + + +select 'order by d1, d2 nulls first'; +select d1, d2, dynamicType(d1), dynamicType(d2) from test order by d1, d2 nulls first; + +select 'order by d1, d2 nulls last'; +select d1, d2, dynamicType(d1), dynamicType(d2) from test order by d1, d2 nulls last; + +select 'order by d2, d1 nulls first'; +select d1, d2, dynamicType(d1), dynamicType(d2) from test order by d2, d1 nulls first; + +select 'order by d2, d1 nulls last'; +select d1, d2, dynamicType(d1), dynamicType(d2) from test order by d2, d1 nulls last; + +select 'd1 = d2'; +select d1, d2, d1 = d2, dynamicType(d1), dynamicType(d2) from test order by d1, d2; + +select 'd1 < d2'; +select d1, d2, d1 < d2, dynamicType(d1), dynamicType(d2) from test order by d1, d2; + +select 'd1 <= d2'; +select d1, d2, d1 <= d2, dynamicType(d1), dynamicType(d2) from test order by d1, d2; + +select 'd1 > d2'; +select d1, d2, d1 > d2, dynamicType(d1), dynamicType(d2) from test order by d1, d2; + +select 'd1 >= d2'; +select d1, d2, d2 >= d2, dynamicType(d1), dynamicType(d2) from test order by d1, d2; + +drop table test; + diff --git a/tests/queries/0_stateless/03036_dynamic_read_subcolumns.reference b/tests/queries/0_stateless/03036_dynamic_read_subcolumns.reference new file mode 100644 index 00000000000..36984bc8b9b --- /dev/null +++ b/tests/queries/0_stateless/03036_dynamic_read_subcolumns.reference @@ -0,0 +1,57 @@ +Memory +test +Array(Array(Dynamic)) +Array(Variant(String, UInt64)) +None +String +UInt64 +200000 +200000 +200000 +200000 +0 +0 +200000 +200000 +100000 +100000 +200000 +0 +MergeTree compact +test +Array(Array(Dynamic)) +Array(Variant(String, UInt64)) +None +String +UInt64 +200000 +200000 +200000 +200000 +0 +0 +200000 +200000 +100000 +100000 +200000 +0 +MergeTree wide +test +Array(Array(Dynamic)) +Array(Variant(String, UInt64)) +None +String +UInt64 +200000 +200000 +200000 +200000 +0 +0 +200000 +200000 +100000 +100000 +200000 +0 diff --git a/tests/queries/0_stateless/03036_dynamic_read_subcolumns.sh b/tests/queries/0_stateless/03036_dynamic_read_subcolumns.sh new file mode 100755 index 00000000000..65517061b99 --- /dev/null +++ b/tests/queries/0_stateless/03036_dynamic_read_subcolumns.sh @@ -0,0 +1,62 @@ +#!/usr/bin/env bash +# Tags: long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# reset --log_comment +CLICKHOUSE_LOG_COMMENT= +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1" + + +function test() +{ + echo "test" + $CH_CLIENT -q "insert into test select number, number from numbers(100000) settings min_insert_block_size_rows=50000" + $CH_CLIENT -q "insert into test select number, 'str_' || toString(number) from numbers(100000, 100000) settings min_insert_block_size_rows=50000" + $CH_CLIENT -q "insert into test select number, arrayMap(x -> multiIf(number % 9 == 0, NULL, number % 9 == 3, 'str_' || toString(number), number), range(number % 10 + 1)) from numbers(200000, 100000) settings min_insert_block_size_rows=50000" + $CH_CLIENT -q "insert into test select number, NULL from numbers(300000, 100000) settings min_insert_block_size_rows=50000" + $CH_CLIENT -q "insert into test select number, multiIf(number % 4 == 3, 'str_' || toString(number), number % 4 == 2, NULL, number % 4 == 1, number, arrayMap(x -> multiIf(number % 9 == 0, NULL, number % 9 == 3, 'str_' || toString(number), number), range(number % 10 + 1))) from numbers(400000, 400000) settings min_insert_block_size_rows=50000" + $CH_CLIENT -q "insert into test select number, [range((number % 10 + 1)::UInt64)]::Array(Array(Dynamic)) from numbers(100000, 100000) settings min_insert_block_size_rows=50000" + + $CH_CLIENT -q "select distinct dynamicType(d) as type from test order by type" + $CH_CLIENT -q "select count() from test where dynamicType(d) == 'UInt64'" + $CH_CLIENT -q "select count() from test where d.UInt64 is not NULL" + $CH_CLIENT -q "select count() from test where dynamicType(d) == 'String'" + $CH_CLIENT -q "select count() from test where d.String is not NULL" + $CH_CLIENT -q "select count() from test where dynamicType(d) == 'Date'" + $CH_CLIENT -q "select count() from test where d.Date is not NULL" + $CH_CLIENT -q "select count() from test where dynamicType(d) == 'Array(Variant(String, UInt64))'" + $CH_CLIENT -q "select count() from test where not empty(d.\`Array(Variant(String, UInt64))\`)" + $CH_CLIENT -q "select count() from test where dynamicType(d) == 'Array(Array(Dynamic))'" + $CH_CLIENT -q "select count() from test where not empty(d.\`Array(Array(Dynamic))\`)" + $CH_CLIENT -q "select count() from test where d is NULL" + $CH_CLIENT -q "select count() from test where not empty(d.\`Tuple(a Array(Dynamic))\`.a.String)" + + $CH_CLIENT -q "select d, d.UInt64, d.String, d.\`Array(Variant(String, UInt64))\` from test format Null" + $CH_CLIENT -q "select d.UInt64, d.String, d.\`Array(Variant(String, UInt64))\` from test format Null" + $CH_CLIENT -q "select d.Int8, d.Date, d.\`Array(String)\` from test format Null" + $CH_CLIENT -q "select d, d.UInt64, d.Date, d.\`Array(Variant(String, UInt64))\`, d.\`Array(Variant(String, UInt64))\`.size0, d.\`Array(Variant(String, UInt64))\`.UInt64 from test format Null" + $CH_CLIENT -q "select d.UInt64, d.Date, d.\`Array(Variant(String, UInt64))\`, d.\`Array(Variant(String, UInt64))\`.size0, d.\`Array(Variant(String, UInt64))\`.UInt64, d.\`Array(Variant(String, UInt64))\`.String from test format Null" + $CH_CLIENT -q "select d, d.\`Tuple(a UInt64, b String)\`.a, d.\`Array(Dynamic)\`.\`Variant(String, UInt64)\`.UInt64, d.\`Array(Variant(String, UInt64))\`.UInt64 from test format Null" + $CH_CLIENT -q "select d.\`Array(Dynamic)\`.\`Variant(String, UInt64)\`.UInt64, d.\`Array(Dynamic)\`.size0, d.\`Array(Variant(String, UInt64))\`.UInt64 from test format Null" + $CH_CLIENT -q "select d.\`Array(Array(Dynamic))\`.size1, d.\`Array(Array(Dynamic))\`.UInt64, d.\`Array(Array(Dynamic))\`.\`Map(String, Tuple(a UInt64))\`.values.a from test format Null" +} + +$CH_CLIENT -q "drop table if exists test;" + +echo "Memory" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=Memory" +test +$CH_CLIENT -q "drop table test;" + +echo "MergeTree compact" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000;" +test +$CH_CLIENT -q "drop table test;" + +echo "MergeTree wide" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;" +test +$CH_CLIENT -q "drop table test;" diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal.reference b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal.reference new file mode 100644 index 00000000000..59297e46330 --- /dev/null +++ b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal.reference @@ -0,0 +1,60 @@ +MergeTree compact +test +50000 DateTime +60000 Date +70000 Array(UInt16) +80000 String +100000 None +100000 UInt64 +70000 Array(UInt16) +100000 None +100000 UInt64 +190000 String +70000 Array(UInt16) +100000 None +100000 UInt64 +190000 String +200000 Map(UInt64, UInt64) +100000 None +100000 UInt64 +200000 Map(UInt64, UInt64) +260000 String +10000 Tuple(UInt64, UInt64) +100000 None +100000 UInt64 +200000 Map(UInt64, UInt64) +260000 String +100000 None +100000 UInt64 +200000 Map(UInt64, UInt64) +270000 String +MergeTree wide +test +50000 DateTime +60000 Date +70000 Array(UInt16) +80000 String +100000 None +100000 UInt64 +70000 Array(UInt16) +100000 None +100000 UInt64 +190000 String +70000 Array(UInt16) +100000 None +100000 UInt64 +190000 String +200000 Map(UInt64, UInt64) +100000 None +100000 UInt64 +200000 Map(UInt64, UInt64) +260000 String +10000 Tuple(UInt64, UInt64) +100000 None +100000 UInt64 +200000 Map(UInt64, UInt64) +260000 String +100000 None +100000 UInt64 +200000 Map(UInt64, UInt64) +270000 String diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal.sh b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal.sh new file mode 100755 index 00000000000..7c1ac41cfdc --- /dev/null +++ b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal.sh @@ -0,0 +1,52 @@ +#!/usr/bin/env bash +# Tags: long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# reset --log_comment +CLICKHOUSE_LOG_COMMENT= +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + + +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 --index_granularity_bytes 10485760 --merge_max_block_size 8192 --merge_max_block_size_bytes=10485760 --index_granularity 8192" + +function test() +{ + echo "test" + $CH_CLIENT -q "system stop merges test" + $CH_CLIENT -q "insert into test select number, number from numbers(100000)" + $CH_CLIENT -q "insert into test select number, 'str_' || toString(number) from numbers(80000)" + $CH_CLIENT -q "insert into test select number, range(number % 10 + 1) from numbers(70000)" + $CH_CLIENT -q "insert into test select number, toDate(number) from numbers(60000)" + $CH_CLIENT -q "insert into test select number, toDateTime(number) from numbers(50000)" + $CH_CLIENT -q "insert into test select number, NULL from numbers(100000)" + + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + $CH_CLIENT -nm -q "system start merges test; optimize table test final;" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + + $CH_CLIENT -q "system stop merges test" + $CH_CLIENT -q "insert into test select number, map(number, number) from numbers(200000)" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + $CH_CLIENT -nm -q "system start merges test; optimize table test final;" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + + $CH_CLIENT -q "system stop merges test" + $CH_CLIENT -q "insert into test select number, tuple(number, number) from numbers(10000)" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + $CH_CLIENT -nm -q "system start merges test; optimize table test final;" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" +} + +$CH_CLIENT -q "drop table if exists test;" + +echo "MergeTree compact" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_columns_to_activate=10;" +test +$CH_CLIENT -q "drop table test;" + +echo "MergeTree wide" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_columns_to_activate=10;" +test +$CH_CLIENT -q "drop table test;" + diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_vertical.reference b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical.reference new file mode 100644 index 00000000000..59297e46330 --- /dev/null +++ b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical.reference @@ -0,0 +1,60 @@ +MergeTree compact +test +50000 DateTime +60000 Date +70000 Array(UInt16) +80000 String +100000 None +100000 UInt64 +70000 Array(UInt16) +100000 None +100000 UInt64 +190000 String +70000 Array(UInt16) +100000 None +100000 UInt64 +190000 String +200000 Map(UInt64, UInt64) +100000 None +100000 UInt64 +200000 Map(UInt64, UInt64) +260000 String +10000 Tuple(UInt64, UInt64) +100000 None +100000 UInt64 +200000 Map(UInt64, UInt64) +260000 String +100000 None +100000 UInt64 +200000 Map(UInt64, UInt64) +270000 String +MergeTree wide +test +50000 DateTime +60000 Date +70000 Array(UInt16) +80000 String +100000 None +100000 UInt64 +70000 Array(UInt16) +100000 None +100000 UInt64 +190000 String +70000 Array(UInt16) +100000 None +100000 UInt64 +190000 String +200000 Map(UInt64, UInt64) +100000 None +100000 UInt64 +200000 Map(UInt64, UInt64) +260000 String +10000 Tuple(UInt64, UInt64) +100000 None +100000 UInt64 +200000 Map(UInt64, UInt64) +260000 String +100000 None +100000 UInt64 +200000 Map(UInt64, UInt64) +270000 String diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_vertical.sh b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical.sh new file mode 100755 index 00000000000..927ceac72b5 --- /dev/null +++ b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical.sh @@ -0,0 +1,51 @@ +#!/usr/bin/env bash +# Tags: long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# reset --log_comment +CLICKHOUSE_LOG_COMMENT= +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + + + +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 --index_granularity_bytes 10485760 --merge_max_block_size 8192 --merge_max_block_size_bytes=10485760 --index_granularity 8192" +function test() +{ + echo "test" + $CH_CLIENT -q "system stop merges test" + $CH_CLIENT -q "insert into test select number, number from numbers(100000)" + $CH_CLIENT -q "insert into test select number, 'str_' || toString(number) from numbers(80000)" + $CH_CLIENT -q "insert into test select number, range(number % 10 + 1) from numbers(70000)" + $CH_CLIENT -q "insert into test select number, toDate(number) from numbers(60000)" + $CH_CLIENT -q "insert into test select number, toDateTime(number) from numbers(50000)" + $CH_CLIENT -q "insert into test select number, NULL from numbers(100000)" + + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + $CH_CLIENT -nm -q "system start merges test; optimize table test final;" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + + $CH_CLIENT -q "system stop merges test" + $CH_CLIENT -q "insert into test select number, map(number, number) from numbers(200000)" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + $CH_CLIENT -nm -q "system start merges test; optimize table test final;" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + + $CH_CLIENT -q "system stop merges test" + $CH_CLIENT -q "insert into test select number, tuple(number, number) from numbers(10000)" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + $CH_CLIENT -nm -q "system start merges test; optimize table test final;" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" +} + +$CH_CLIENT -q "drop table if exists test;" + +echo "MergeTree compact" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1;" +test +$CH_CLIENT -q "drop table test;" + +echo "MergeTree wide" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1;" +test +$CH_CLIENT -q "drop table test;" diff --git a/tests/queries/0_stateless/03037_dynamic_merges_2.reference b/tests/queries/0_stateless/03037_dynamic_merges_2.reference new file mode 100644 index 00000000000..420b8185b16 --- /dev/null +++ b/tests/queries/0_stateless/03037_dynamic_merges_2.reference @@ -0,0 +1,20 @@ +MergeTree compact + horizontal merge +test +1000000 Array(UInt16) +1000000 String +1000000 UInt64 +MergeTree wide + horizontal merge +test +1000000 Array(UInt16) +1000000 String +1000000 UInt64 +MergeTree compact + vertical merge +test +1000000 Array(UInt16) +1000000 String +1000000 UInt64 +MergeTree wide + vertical merge +test +1000000 Array(UInt16) +1000000 String +1000000 UInt64 diff --git a/tests/queries/0_stateless/03037_dynamic_merges_2.sh b/tests/queries/0_stateless/03037_dynamic_merges_2.sh new file mode 100755 index 00000000000..40adbdd4262 --- /dev/null +++ b/tests/queries/0_stateless/03037_dynamic_merges_2.sh @@ -0,0 +1,45 @@ +#!/usr/bin/env bash +# Tags: long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# reset --log_comment +CLICKHOUSE_LOG_COMMENT= +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1" + + +function test() +{ + echo "test" + $CH_CLIENT -q "system stop merges test" + $CH_CLIENT -q "insert into test select number, number from numbers(1000000)" + $CH_CLIENT -q "insert into test select number, 'str_' || toString(number) from numbers(1000000, 1000000)" + $CH_CLIENT -q "insert into test select number, range(number % 10 + 1) from numbers(2000000, 1000000)" + + $CH_CLIENT -nm -q "system start merges test; optimize table test final;" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" +} + +$CH_CLIENT -q "drop table if exists test;" + +echo "MergeTree compact + horizontal merge" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000;" +test +$CH_CLIENT -q "drop table test;" + +echo "MergeTree wide + horizontal merge" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;" +test +$CH_CLIENT -q "drop table test;" + +echo "MergeTree compact + vertical merge" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1;" +test +$CH_CLIENT -q "drop table test;" + +echo "MergeTree wide + vertical merge" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1;" +test +$CH_CLIENT -q "drop table test;" diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges.reference b/tests/queries/0_stateless/03038_nested_dynamic_merges.reference new file mode 100644 index 00000000000..65034647775 --- /dev/null +++ b/tests/queries/0_stateless/03038_nested_dynamic_merges.reference @@ -0,0 +1,92 @@ +MergeTree compact + horizontal merge +test +16667 Tuple(a Dynamic(max_types=3)):Date +33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) +50000 Tuple(a Dynamic(max_types=3)):String +50000 Tuple(a Dynamic(max_types=3)):UInt64 +100000 UInt64:None +33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) +50000 Tuple(a Dynamic(max_types=3)):UInt64 +66667 Tuple(a Dynamic(max_types=3)):String +100000 UInt64:None +16667 Tuple(a Dynamic(max_types=3)):DateTime +33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) +50000 Tuple(a Dynamic(max_types=3)):UInt64 +66667 Tuple(a Dynamic(max_types=3)):String +100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) +100000 UInt64:None +133333 Tuple(a Dynamic(max_types=3)):None +50000 Tuple(a Dynamic(max_types=3)):UInt64 +100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) +100000 UInt64:None +116667 Tuple(a Dynamic(max_types=3)):String +133333 Tuple(a Dynamic(max_types=3)):None +MergeTree wide + horizontal merge +test +16667 Tuple(a Dynamic(max_types=3)):Date +33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) +50000 Tuple(a Dynamic(max_types=3)):String +50000 Tuple(a Dynamic(max_types=3)):UInt64 +100000 UInt64:None +33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) +50000 Tuple(a Dynamic(max_types=3)):UInt64 +66667 Tuple(a Dynamic(max_types=3)):String +100000 UInt64:None +16667 Tuple(a Dynamic(max_types=3)):DateTime +33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) +50000 Tuple(a Dynamic(max_types=3)):UInt64 +66667 Tuple(a Dynamic(max_types=3)):String +100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) +100000 UInt64:None +133333 Tuple(a Dynamic(max_types=3)):None +50000 Tuple(a Dynamic(max_types=3)):UInt64 +100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) +100000 UInt64:None +116667 Tuple(a Dynamic(max_types=3)):String +133333 Tuple(a Dynamic(max_types=3)):None +MergeTree compact + vertical merge +test +16667 Tuple(a Dynamic(max_types=3)):Date +33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) +50000 Tuple(a Dynamic(max_types=3)):String +50000 Tuple(a Dynamic(max_types=3)):UInt64 +100000 UInt64:None +33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) +50000 Tuple(a Dynamic(max_types=3)):UInt64 +66667 Tuple(a Dynamic(max_types=3)):String +100000 UInt64:None +16667 Tuple(a Dynamic(max_types=3)):DateTime +33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) +50000 Tuple(a Dynamic(max_types=3)):UInt64 +66667 Tuple(a Dynamic(max_types=3)):String +100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) +100000 UInt64:None +133333 Tuple(a Dynamic(max_types=3)):None +50000 Tuple(a Dynamic(max_types=3)):UInt64 +100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) +100000 UInt64:None +116667 Tuple(a Dynamic(max_types=3)):String +133333 Tuple(a Dynamic(max_types=3)):None +MergeTree wide + vertical merge +test +16667 Tuple(a Dynamic(max_types=3)):Date +33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) +50000 Tuple(a Dynamic(max_types=3)):String +50000 Tuple(a Dynamic(max_types=3)):UInt64 +100000 UInt64:None +33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) +50000 Tuple(a Dynamic(max_types=3)):UInt64 +66667 Tuple(a Dynamic(max_types=3)):String +100000 UInt64:None +16667 Tuple(a Dynamic(max_types=3)):DateTime +33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) +50000 Tuple(a Dynamic(max_types=3)):UInt64 +66667 Tuple(a Dynamic(max_types=3)):String +100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) +100000 UInt64:None +133333 Tuple(a Dynamic(max_types=3)):None +50000 Tuple(a Dynamic(max_types=3)):UInt64 +100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) +100000 UInt64:None +116667 Tuple(a Dynamic(max_types=3)):String +133333 Tuple(a Dynamic(max_types=3)):None diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges.sh b/tests/queries/0_stateless/03038_nested_dynamic_merges.sh new file mode 100755 index 00000000000..b82ddb3813e --- /dev/null +++ b/tests/queries/0_stateless/03038_nested_dynamic_merges.sh @@ -0,0 +1,53 @@ +#!/usr/bin/env bash +# Tags: long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# reset --log_comment +CLICKHOUSE_LOG_COMMENT= +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1" + + +function test() +{ + echo "test" + $CH_CLIENT -q "system stop merges test" + $CH_CLIENT -q "insert into test select number, number from numbers(100000)" + $CH_CLIENT -q "insert into test select number, tuple(if(number % 2 == 0, number, 'str_' || toString(number)))::Tuple(a Dynamic(max_types=3)) from numbers(100000)" + $CH_CLIENT -q "insert into test select number, tuple(if(number % 3 == 0, toDate(number), range(number % 10)))::Tuple(a Dynamic(max_types=3)) from numbers(50000)" + + $CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type" + $CH_CLIENT -nm -q "system start merges test; optimize table test final;" + $CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type" + + $CH_CLIENT -q "insert into test select number, tuple(if(number % 3 == 0, toDateTime(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(50000)" + $CH_CLIENT -q "insert into test select number, tuple(if(number % 2 == 0, tuple(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(200000)" + + $CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type" + $CH_CLIENT -nm -q "system start merges test; optimize table test final;" + $CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type" +} + +$CH_CLIENT -q "drop table if exists test;" + +echo "MergeTree compact + horizontal merge" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000;" +test +$CH_CLIENT -q "drop table test;" + +echo "MergeTree wide + horizontal merge" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;" +test +$CH_CLIENT -q "drop table test;" + +echo "MergeTree compact + vertical merge" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1;" +test +$CH_CLIENT -q "drop table test;" + +echo "MergeTree wide + vertical merge" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1;" +test +$CH_CLIENT -q "drop table test;" diff --git a/tests/queries/0_stateless/03039_dynamic_aggregating_merge_tree.reference b/tests/queries/0_stateless/03039_dynamic_aggregating_merge_tree.reference new file mode 100644 index 00000000000..3c186fcc935 --- /dev/null +++ b/tests/queries/0_stateless/03039_dynamic_aggregating_merge_tree.reference @@ -0,0 +1,32 @@ +MergeTree compact + horizontal merge +100000 String +100000 UInt64 +200000 1 +50000 String +100000 UInt64 +100000 1 +50000 2 +MergeTree wide + horizontal merge +100000 String +100000 UInt64 +200000 1 +50000 String +100000 UInt64 +100000 1 +50000 2 +MergeTree compact + vertical merge +100000 String +100000 UInt64 +200000 1 +50000 String +100000 UInt64 +100000 1 +50000 2 +MergeTree wide + vertical merge +100000 String +100000 UInt64 +200000 1 +50000 String +100000 UInt64 +100000 1 +50000 2 diff --git a/tests/queries/0_stateless/03039_dynamic_aggregating_merge_tree.sh b/tests/queries/0_stateless/03039_dynamic_aggregating_merge_tree.sh new file mode 100755 index 00000000000..b8760ec0e1d --- /dev/null +++ b/tests/queries/0_stateless/03039_dynamic_aggregating_merge_tree.sh @@ -0,0 +1,40 @@ +#!/usr/bin/env bash +# Tags: long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# reset --log_comment +CLICKHOUSE_LOG_COMMENT= +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# Fix some settings to avoid timeouts because of some settings randomization +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_merge_tree_settings --allow_experimental_dynamic_type=1 --index_granularity_bytes 10485760 --index_granularity 8128 --merge_max_block_size 8128 --optimize_aggregation_in_order 0" + +function test() +{ + $CH_CLIENT -q "create table test (id UInt64, sum AggregateFunction(sum, UInt64), d Dynamic) engine=AggregatingMergeTree() order by id settings $1;" + $CH_CLIENT -q "system stop merges test" + $CH_CLIENT -q "insert into test select number, sumState(1::UInt64), number from numbers(100000) group by number" + $CH_CLIENT -q "insert into test select number, sumState(1::UInt64), 'str_' || toString(number) from numbers(50000, 100000) group by number" + + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + $CH_CLIENT -q "select count(), sum from (select sumMerge(sum) as sum from test group by id, _part) group by sum order by sum, count()" + $CH_CLIENT -nm -q "system start merges test; optimize table test final" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + $CH_CLIENT -q "select count(), sum from (select sumMerge(sum) as sum from test group by id, _part) group by sum order by sum, count()" + $CH_CLIENT -q "drop table test" +} + +$CH_CLIENT -q "drop table if exists test;" + +echo "MergeTree compact + horizontal merge" +test "min_rows_for_wide_part=100000000000, min_bytes_for_wide_part=1000000000000, vertical_merge_algorithm_min_rows_to_activate=10000000000, vertical_merge_algorithm_min_columns_to_activate=100000000000" + +echo "MergeTree wide + horizontal merge" +test "min_rows_for_wide_part=1, min_bytes_for_wide_part=1,vertical_merge_algorithm_min_rows_to_activate=1000000000, vertical_merge_algorithm_min_columns_to_activate=1000000000000" + +echo "MergeTree compact + vertical merge" +test "min_rows_for_wide_part=100000000000, min_bytes_for_wide_part=1000000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1" + +echo "MergeTree wide + vertical merge" +test "min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1" diff --git a/tests/queries/0_stateless/03039_dynamic_collapsing_merge_tree.reference b/tests/queries/0_stateless/03039_dynamic_collapsing_merge_tree.reference new file mode 100644 index 00000000000..fc293cc2ec8 --- /dev/null +++ b/tests/queries/0_stateless/03039_dynamic_collapsing_merge_tree.reference @@ -0,0 +1,20 @@ +MergeTree compact + horizontal merge +100000 String +100000 UInt64 +50000 String +50000 UInt64 +MergeTree wide + horizontal merge +100000 String +100000 UInt64 +50000 String +50000 UInt64 +MergeTree compact + vertical merge +100000 String +100000 UInt64 +50000 String +50000 UInt64 +MergeTree wide + vertical merge +100000 String +100000 UInt64 +50000 String +50000 UInt64 diff --git a/tests/queries/0_stateless/03039_dynamic_collapsing_merge_tree.sh b/tests/queries/0_stateless/03039_dynamic_collapsing_merge_tree.sh new file mode 100755 index 00000000000..881c9ec64cc --- /dev/null +++ b/tests/queries/0_stateless/03039_dynamic_collapsing_merge_tree.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash +# Tags: long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# reset --log_comment +CLICKHOUSE_LOG_COMMENT= +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# Fix some settings to avoid timeouts because of some settings randomization +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_merge_tree_settings --allow_experimental_dynamic_type=1 --index_granularity_bytes 10485760 --index_granularity 8128 --merge_max_block_size 8128" + +function test() +{ + $CH_CLIENT -q "create table test (id UInt64, sign Int8, d Dynamic) engine=CollapsingMergeTree(sign) order by id settings $1;" + $CH_CLIENT -q "system stop merges test" + $CH_CLIENT -q "insert into test select number, 1, number from numbers(100000)" + $CH_CLIENT -q "insert into test select number, -1, 'str_' || toString(number) from numbers(50000, 100000)" + + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + $CH_CLIENT -nm -q "system start merges test; optimize table test final" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + $CH_CLIENT -q "drop table test" +} + +$CH_CLIENT -q "drop table if exists test;" + +echo "MergeTree compact + horizontal merge" +test "min_rows_for_wide_part=100000000000, min_bytes_for_wide_part=1000000000000" + +echo "MergeTree wide + horizontal merge" +test "min_rows_for_wide_part=1, min_bytes_for_wide_part=1" + +echo "MergeTree compact + vertical merge" +test "min_rows_for_wide_part=100000000000, min_bytes_for_wide_part=1000000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1" + +echo "MergeTree wide + vertical merge" +test "min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1" diff --git a/tests/queries/0_stateless/03039_dynamic_replacing_merge_tree.reference b/tests/queries/0_stateless/03039_dynamic_replacing_merge_tree.reference new file mode 100644 index 00000000000..132b9df6b26 --- /dev/null +++ b/tests/queries/0_stateless/03039_dynamic_replacing_merge_tree.reference @@ -0,0 +1,20 @@ +MergeTree compact + horizontal merge +100000 String +100000 UInt64 +50000 UInt64 +100000 String +MergeTree wide + horizontal merge +100000 String +100000 UInt64 +50000 UInt64 +100000 String +MergeTree compact + vertical merge +100000 String +100000 UInt64 +50000 UInt64 +100000 String +MergeTree wide + vertical merge +100000 String +100000 UInt64 +50000 UInt64 +100000 String diff --git a/tests/queries/0_stateless/03039_dynamic_replacing_merge_tree.sh b/tests/queries/0_stateless/03039_dynamic_replacing_merge_tree.sh new file mode 100755 index 00000000000..fc9039ac98c --- /dev/null +++ b/tests/queries/0_stateless/03039_dynamic_replacing_merge_tree.sh @@ -0,0 +1,39 @@ +#!/usr/bin/env bash +# Tags: long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# reset --log_comment +CLICKHOUSE_LOG_COMMENT= +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# Fix some settings to avoid timeouts because of some settings randomization +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_merge_tree_settings --allow_experimental_dynamic_type=1 --index_granularity_bytes 10485760 --index_granularity 8128 --merge_max_block_size 8128" + + +function test() +{ + $CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=ReplacingMergeTree order by id settings $1;" + $CH_CLIENT -q "system stop merges test" + $CH_CLIENT -q "insert into test select number, number from numbers(100000)" + $CH_CLIENT -q "insert into test select number, 'str_' || toString(number) from numbers(50000, 100000)" + + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + $CH_CLIENT -nm -q "system start merges test; optimize table test final" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + $CH_CLIENT -q "drop table test" +} + +$CH_CLIENT -q "drop table if exists test;" + +echo "MergeTree compact + horizontal merge" +test "min_rows_for_wide_part=100000000000, min_bytes_for_wide_part=1000000000000, vertical_merge_algorithm_min_rows_to_activate=10000000000, vertical_merge_algorithm_min_columns_to_activate=100000000000" + +echo "MergeTree wide + horizontal merge" +test "min_rows_for_wide_part=1, min_bytes_for_wide_part=1,vertical_merge_algorithm_min_rows_to_activate=1000000000, vertical_merge_algorithm_min_columns_to_activate=1000000000000" + +echo "MergeTree compact + vertical merge" +test "min_rows_for_wide_part=100000000000, min_bytes_for_wide_part=1000000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1" + +echo "MergeTree wide + vertical merge" +test "min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1" diff --git a/tests/queries/0_stateless/03039_dynamic_summing_merge_tree.reference b/tests/queries/0_stateless/03039_dynamic_summing_merge_tree.reference new file mode 100644 index 00000000000..3c186fcc935 --- /dev/null +++ b/tests/queries/0_stateless/03039_dynamic_summing_merge_tree.reference @@ -0,0 +1,32 @@ +MergeTree compact + horizontal merge +100000 String +100000 UInt64 +200000 1 +50000 String +100000 UInt64 +100000 1 +50000 2 +MergeTree wide + horizontal merge +100000 String +100000 UInt64 +200000 1 +50000 String +100000 UInt64 +100000 1 +50000 2 +MergeTree compact + vertical merge +100000 String +100000 UInt64 +200000 1 +50000 String +100000 UInt64 +100000 1 +50000 2 +MergeTree wide + vertical merge +100000 String +100000 UInt64 +200000 1 +50000 String +100000 UInt64 +100000 1 +50000 2 diff --git a/tests/queries/0_stateless/03039_dynamic_summing_merge_tree.sh b/tests/queries/0_stateless/03039_dynamic_summing_merge_tree.sh new file mode 100755 index 00000000000..f9da70e95ca --- /dev/null +++ b/tests/queries/0_stateless/03039_dynamic_summing_merge_tree.sh @@ -0,0 +1,40 @@ +#!/usr/bin/env bash +# Tags: long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# reset --log_comment +CLICKHOUSE_LOG_COMMENT= +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# Fix some settings to avoid timeouts because of some settings randomization +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_merge_tree_settings --allow_experimental_dynamic_type=1 --index_granularity_bytes 10485760 --index_granularity 8128 --merge_max_block_size 8128" + +function test() +{ + $CH_CLIENT -q "create table test (id UInt64, sum UInt64, d Dynamic) engine=SummingMergeTree(sum) order by id settings $1;" + $CH_CLIENT -q "system stop merges test" + $CH_CLIENT -q "insert into test select number, 1, number from numbers(100000)" + $CH_CLIENT -q "insert into test select number, 1, 'str_' || toString(number) from numbers(50000, 100000)" + + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + $CH_CLIENT -q "select count(), sum from test group by sum order by sum, count()" + $CH_CLIENT -nm -q "system start merges test; optimize table test final" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + $CH_CLIENT -q "select count(), sum from test group by sum order by sum, count()" + $CH_CLIENT -q "drop table test" +} + +$CH_CLIENT -q "drop table if exists test;" + +echo "MergeTree compact + horizontal merge" +test "min_rows_for_wide_part=100000000000, min_bytes_for_wide_part=1000000000000, vertical_merge_algorithm_min_rows_to_activate=10000000000, vertical_merge_algorithm_min_columns_to_activate=100000000000" + +echo "MergeTree wide + horizontal merge" +test "min_rows_for_wide_part=1, min_bytes_for_wide_part=1,vertical_merge_algorithm_min_rows_to_activate=1000000000, vertical_merge_algorithm_min_columns_to_activate=1000000000000" + +echo "MergeTree compact + vertical merge" +test "min_rows_for_wide_part=100000000000, min_bytes_for_wide_part=1000000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1" + +echo "MergeTree wide + vertical merge" +test "min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1" diff --git a/tests/queries/0_stateless/03039_dynamic_versioned_collapsing_merge_tree.reference b/tests/queries/0_stateless/03039_dynamic_versioned_collapsing_merge_tree.reference new file mode 100644 index 00000000000..cabb0fdefab --- /dev/null +++ b/tests/queries/0_stateless/03039_dynamic_versioned_collapsing_merge_tree.reference @@ -0,0 +1,20 @@ +MergeTree compact + horizontal merge +100000 String +100000 UInt64 +75000 String +75000 UInt64 +MergeTree wide + horizontal merge +100000 String +100000 UInt64 +75000 String +75000 UInt64 +MergeTree compact + vertical merge +100000 String +100000 UInt64 +75000 String +75000 UInt64 +MergeTree wide + vertical merge +100000 String +100000 UInt64 +75000 String +75000 UInt64 diff --git a/tests/queries/0_stateless/03039_dynamic_versioned_collapsing_merge_tree.sh b/tests/queries/0_stateless/03039_dynamic_versioned_collapsing_merge_tree.sh new file mode 100755 index 00000000000..ca313307a6d --- /dev/null +++ b/tests/queries/0_stateless/03039_dynamic_versioned_collapsing_merge_tree.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash +# Tags: long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# reset --log_comment +CLICKHOUSE_LOG_COMMENT= +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# Fix some settings to avoid timeouts because of some settings randomization +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_merge_tree_settings --allow_experimental_dynamic_type=1 --index_granularity_bytes 10485760 --index_granularity 8128 --merge_max_block_size 8128" + +function test() +{ + $CH_CLIENT -q "create table test (id UInt64, sign Int8, version UInt8, d Dynamic) engine=VersionedCollapsingMergeTree(sign, version) order by id settings $1;" + $CH_CLIENT -q "system stop merges test" + $CH_CLIENT -q "insert into test select number, 1, 1, number from numbers(100000)" + $CH_CLIENT -q "insert into test select number, -1, number >= 75000 ? 2 : 1, 'str_' || toString(number) from numbers(50000, 100000)" + + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + $CH_CLIENT -nm -q "system start merges test; optimize table test final" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + $CH_CLIENT -q "drop table test" +} + +$CH_CLIENT -q "drop table if exists test;" + +echo "MergeTree compact + horizontal merge" +test "min_rows_for_wide_part=100000000000, min_bytes_for_wide_part=1000000000000" + +echo "MergeTree wide + horizontal merge" +test "min_rows_for_wide_part=1, min_bytes_for_wide_part=1" + +echo "MergeTree compact + vertical merge" +test "min_rows_for_wide_part=100000000000, min_bytes_for_wide_part=1000000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1" + +echo "MergeTree wide + vertical merge" +test "min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1" diff --git a/tests/queries/0_stateless/03040_dynamic_type_alters_1.reference b/tests/queries/0_stateless/03040_dynamic_type_alters_1.reference new file mode 100644 index 00000000000..ca98ec0963c --- /dev/null +++ b/tests/queries/0_stateless/03040_dynamic_type_alters_1.reference @@ -0,0 +1,526 @@ +Memory +initial insert +alter add column 1 +3 None +0 0 \N \N \N \N +1 1 \N \N \N \N +2 2 \N \N \N \N +insert after alter add column 1 +4 String +4 UInt64 +7 None +0 0 \N \N \N \N \N +1 1 \N \N \N \N \N +2 2 \N \N \N \N \N +3 3 3 \N 3 \N \N +4 4 4 \N 4 \N \N +5 5 5 \N 5 \N \N +6 6 str_6 str_6 \N \N \N +7 7 str_7 str_7 \N \N \N +8 8 str_8 str_8 \N \N \N +9 9 \N \N \N \N \N +10 10 \N \N \N \N \N +11 11 \N \N \N \N \N +12 12 12 \N 12 \N \N +13 13 str_13 str_13 \N \N \N +14 14 \N \N \N \N \N +alter modify column 1 +7 None +8 String +0 0 \N \N \N \N \N +1 1 \N \N \N \N \N +2 2 \N \N \N \N \N +3 3 3 3 \N \N \N +4 4 4 4 \N \N \N +5 5 5 5 \N \N \N +6 6 str_6 str_6 \N \N \N +7 7 str_7 str_7 \N \N \N +8 8 str_8 str_8 \N \N \N +9 9 \N \N \N \N \N +10 10 \N \N \N \N \N +11 11 \N \N \N \N \N +12 12 12 12 \N \N \N +13 13 str_13 str_13 \N \N \N +14 14 \N \N \N \N \N +insert after alter modify column 1 +8 None +11 String +0 0 \N \N \N \N \N +1 1 \N \N \N \N \N +2 2 \N \N \N \N \N +3 3 3 3 \N \N \N +4 4 4 4 \N \N \N +5 5 5 5 \N \N \N +6 6 str_6 str_6 \N \N \N +7 7 str_7 str_7 \N \N \N +8 8 str_8 str_8 \N \N \N +9 9 \N \N \N \N \N +10 10 \N \N \N \N \N +11 11 \N \N \N \N \N +12 12 12 12 \N \N \N +13 13 str_13 str_13 \N \N \N +14 14 \N \N \N \N \N +15 15 \N \N \N \N \N +16 16 16 16 \N \N \N +17 17 str_17 str_17 \N \N \N +18 18 1970-01-19 1970-01-19 \N \N \N +alter modify column 2 +4 UInt64 +7 String +8 None +0 0 \N \N \N \N \N +1 1 \N \N \N \N \N +2 2 \N \N \N \N \N +3 3 3 \N 3 \N \N +4 4 4 \N 4 \N \N +5 5 5 \N 5 \N \N +6 6 str_6 str_6 \N \N \N +7 7 str_7 str_7 \N \N \N +8 8 str_8 str_8 \N \N \N +9 9 \N \N \N \N \N +10 10 \N \N \N \N \N +11 11 \N \N \N \N \N +12 12 12 \N 12 \N \N +13 13 str_13 str_13 \N \N \N +14 14 \N \N \N \N \N +15 15 \N \N \N \N \N +16 16 16 16 \N \N \N +17 17 str_17 str_17 \N \N \N +18 18 1970-01-19 1970-01-19 \N \N \N +insert after alter modify column 2 +1 Date +5 UInt64 +8 String +9 None +0 0 \N \N \N \N \N +1 1 \N \N \N \N \N +2 2 \N \N \N \N \N +3 3 3 \N 3 \N \N +4 4 4 \N 4 \N \N +5 5 5 \N 5 \N \N +6 6 str_6 str_6 \N \N \N +7 7 str_7 str_7 \N \N \N +8 8 str_8 str_8 \N \N \N +9 9 \N \N \N \N \N +10 10 \N \N \N \N \N +11 11 \N \N \N \N \N +12 12 12 \N 12 \N \N +13 13 str_13 str_13 \N \N \N +14 14 \N \N \N \N \N +15 15 \N \N \N \N \N +16 16 16 16 \N \N \N +17 17 str_17 str_17 \N \N \N +18 18 1970-01-19 1970-01-19 \N \N \N +19 19 \N \N \N \N \N +20 20 20 \N 20 \N \N +21 21 str_21 str_21 \N \N \N +22 22 1970-01-23 \N \N 1970-01-23 \N +alter modify column 3 +1 Date +5 UInt64 +8 String +9 None +0 0 0 \N \N \N \N \N \N +1 1 1 \N \N \N \N \N \N +2 2 2 \N \N \N \N \N \N +3 3 3 \N \N \N 3 \N \N +4 4 4 \N \N \N 4 \N \N +5 5 5 \N \N \N 5 \N \N +6 6 6 \N \N str_6 \N \N \N +7 7 7 \N \N str_7 \N \N \N +8 8 8 \N \N str_8 \N \N \N +9 9 9 \N \N \N \N \N \N +10 10 10 \N \N \N \N \N \N +11 11 11 \N \N \N \N \N \N +12 12 12 \N \N \N 12 \N \N +13 13 13 \N \N str_13 \N \N \N +14 14 14 \N \N \N \N \N \N +15 15 15 \N \N \N \N \N \N +16 16 16 \N \N 16 \N \N \N +17 17 17 \N \N str_17 \N \N \N +18 18 18 \N \N 1970-01-19 \N \N \N +19 19 19 \N \N \N \N \N \N +20 20 20 \N \N \N 20 \N \N +21 21 21 \N \N str_21 \N \N \N +22 22 22 \N \N \N \N 1970-01-23 \N +insert after alter modify column 3 +1 Date +5 UInt64 +8 String +12 None +0 0 0 \N \N \N \N \N \N +1 1 1 \N \N \N \N \N \N +2 2 2 \N \N \N \N \N \N +3 3 3 \N \N \N 3 \N \N +4 4 4 \N \N \N 4 \N \N +5 5 5 \N \N \N 5 \N \N +6 6 6 \N \N str_6 \N \N \N +7 7 7 \N \N str_7 \N \N \N +8 8 8 \N \N str_8 \N \N \N +9 9 9 \N \N \N \N \N \N +10 10 10 \N \N \N \N \N \N +11 11 11 \N \N \N \N \N \N +12 12 12 \N \N \N 12 \N \N +13 13 13 \N \N str_13 \N \N \N +14 14 14 \N \N \N \N \N \N +15 15 15 \N \N \N \N \N \N +16 16 16 \N \N 16 \N \N \N +17 17 17 \N \N str_17 \N \N \N +18 18 18 \N \N 1970-01-19 \N \N \N +19 19 19 \N \N \N \N \N \N +20 20 20 \N \N \N 20 \N \N +21 21 21 \N \N str_21 \N \N \N +22 22 22 \N \N \N \N 1970-01-23 \N +23 \N \N \N \N \N \N \N \N +24 24 24 \N \N \N \N \N \N +25 str_25 \N str_25 \N \N \N \N \N +MergeTree compact +initial insert +alter add column 1 +3 None +0 0 \N \N \N \N +1 1 \N \N \N \N +2 2 \N \N \N \N +insert after alter add column 1 +4 String +4 UInt64 +7 None +0 0 \N \N \N \N \N +1 1 \N \N \N \N \N +2 2 \N \N \N \N \N +3 3 3 \N 3 \N \N +4 4 4 \N 4 \N \N +5 5 5 \N 5 \N \N +6 6 str_6 str_6 \N \N \N +7 7 str_7 str_7 \N \N \N +8 8 str_8 str_8 \N \N \N +9 9 \N \N \N \N \N +10 10 \N \N \N \N \N +11 11 \N \N \N \N \N +12 12 12 \N 12 \N \N +13 13 str_13 str_13 \N \N \N +14 14 \N \N \N \N \N +alter modify column 1 +7 None +8 String +0 0 \N \N \N \N \N +1 1 \N \N \N \N \N +2 2 \N \N \N \N \N +3 3 3 3 \N \N \N +4 4 4 4 \N \N \N +5 5 5 5 \N \N \N +6 6 str_6 str_6 \N \N \N +7 7 str_7 str_7 \N \N \N +8 8 str_8 str_8 \N \N \N +9 9 \N \N \N \N \N +10 10 \N \N \N \N \N +11 11 \N \N \N \N \N +12 12 12 12 \N \N \N +13 13 str_13 str_13 \N \N \N +14 14 \N \N \N \N \N +insert after alter modify column 1 +8 None +11 String +0 0 \N \N \N \N \N +1 1 \N \N \N \N \N +2 2 \N \N \N \N \N +3 3 3 3 \N \N \N +4 4 4 4 \N \N \N +5 5 5 5 \N \N \N +6 6 str_6 str_6 \N \N \N +7 7 str_7 str_7 \N \N \N +8 8 str_8 str_8 \N \N \N +9 9 \N \N \N \N \N +10 10 \N \N \N \N \N +11 11 \N \N \N \N \N +12 12 12 12 \N \N \N +13 13 str_13 str_13 \N \N \N +14 14 \N \N \N \N \N +15 15 \N \N \N \N \N +16 16 16 16 \N \N \N +17 17 str_17 str_17 \N \N \N +18 18 1970-01-19 1970-01-19 \N \N \N +alter modify column 2 +8 None +11 String +0 0 \N \N \N \N \N +1 1 \N \N \N \N \N +2 2 \N \N \N \N \N +3 3 3 3 \N \N \N +4 4 4 4 \N \N \N +5 5 5 5 \N \N \N +6 6 str_6 str_6 \N \N \N +7 7 str_7 str_7 \N \N \N +8 8 str_8 str_8 \N \N \N +9 9 \N \N \N \N \N +10 10 \N \N \N \N \N +11 11 \N \N \N \N \N +12 12 12 12 \N \N \N +13 13 str_13 str_13 \N \N \N +14 14 \N \N \N \N \N +15 15 \N \N \N \N \N +16 16 16 16 \N \N \N +17 17 str_17 str_17 \N \N \N +18 18 1970-01-19 1970-01-19 \N \N \N +insert after alter modify column 2 +1 Date +1 UInt64 +9 None +12 String +0 0 \N \N \N \N \N +1 1 \N \N \N \N \N +2 2 \N \N \N \N \N +3 3 3 3 \N \N \N +4 4 4 4 \N \N \N +5 5 5 5 \N \N \N +6 6 str_6 str_6 \N \N \N +7 7 str_7 str_7 \N \N \N +8 8 str_8 str_8 \N \N \N +9 9 \N \N \N \N \N +10 10 \N \N \N \N \N +11 11 \N \N \N \N \N +12 12 12 12 \N \N \N +13 13 str_13 str_13 \N \N \N +14 14 \N \N \N \N \N +15 15 \N \N \N \N \N +16 16 16 16 \N \N \N +17 17 str_17 str_17 \N \N \N +18 18 1970-01-19 1970-01-19 \N \N \N +19 19 \N \N \N \N \N +20 20 20 \N 20 \N \N +21 21 str_21 str_21 \N \N \N +22 22 1970-01-23 \N \N 1970-01-23 \N +alter modify column 3 +1 Date +1 UInt64 +9 None +12 String +0 0 0 \N \N \N \N \N \N +1 1 1 \N \N \N \N \N \N +2 2 2 \N \N \N \N \N \N +3 3 3 \N \N 3 \N \N \N +4 4 4 \N \N 4 \N \N \N +5 5 5 \N \N 5 \N \N \N +6 6 6 \N \N str_6 \N \N \N +7 7 7 \N \N str_7 \N \N \N +8 8 8 \N \N str_8 \N \N \N +9 9 9 \N \N \N \N \N \N +10 10 10 \N \N \N \N \N \N +11 11 11 \N \N \N \N \N \N +12 12 12 \N \N 12 \N \N \N +13 13 13 \N \N str_13 \N \N \N +14 14 14 \N \N \N \N \N \N +15 15 15 \N \N \N \N \N \N +16 16 16 \N \N 16 \N \N \N +17 17 17 \N \N str_17 \N \N \N +18 18 18 \N \N 1970-01-19 \N \N \N +19 19 19 \N \N \N \N \N \N +20 20 20 \N \N \N 20 \N \N +21 21 21 \N \N str_21 \N \N \N +22 22 22 \N \N \N \N 1970-01-23 \N +insert after alter modify column 3 +1 Date +1 UInt64 +12 None +12 String +0 0 0 \N \N \N \N \N \N +1 1 1 \N \N \N \N \N \N +2 2 2 \N \N \N \N \N \N +3 3 3 \N \N 3 \N \N \N +4 4 4 \N \N 4 \N \N \N +5 5 5 \N \N 5 \N \N \N +6 6 6 \N \N str_6 \N \N \N +7 7 7 \N \N str_7 \N \N \N +8 8 8 \N \N str_8 \N \N \N +9 9 9 \N \N \N \N \N \N +10 10 10 \N \N \N \N \N \N +11 11 11 \N \N \N \N \N \N +12 12 12 \N \N 12 \N \N \N +13 13 13 \N \N str_13 \N \N \N +14 14 14 \N \N \N \N \N \N +15 15 15 \N \N \N \N \N \N +16 16 16 \N \N 16 \N \N \N +17 17 17 \N \N str_17 \N \N \N +18 18 18 \N \N 1970-01-19 \N \N \N +19 19 19 \N \N \N \N \N \N +20 20 20 \N \N \N 20 \N \N +21 21 21 \N \N str_21 \N \N \N +22 22 22 \N \N \N \N 1970-01-23 \N +23 \N \N \N \N \N \N \N \N +24 24 24 \N \N \N \N \N \N +25 str_25 \N str_25 \N \N \N \N \N +MergeTree wide +initial insert +alter add column 1 +3 None +0 0 \N \N \N \N +1 1 \N \N \N \N +2 2 \N \N \N \N +insert after alter add column 1 +4 String +4 UInt64 +7 None +0 0 \N \N \N \N \N +1 1 \N \N \N \N \N +2 2 \N \N \N \N \N +3 3 3 \N 3 \N \N +4 4 4 \N 4 \N \N +5 5 5 \N 5 \N \N +6 6 str_6 str_6 \N \N \N +7 7 str_7 str_7 \N \N \N +8 8 str_8 str_8 \N \N \N +9 9 \N \N \N \N \N +10 10 \N \N \N \N \N +11 11 \N \N \N \N \N +12 12 12 \N 12 \N \N +13 13 str_13 str_13 \N \N \N +14 14 \N \N \N \N \N +alter modify column 1 +7 None +8 String +0 0 \N \N \N \N \N +1 1 \N \N \N \N \N +2 2 \N \N \N \N \N +3 3 3 3 \N \N \N +4 4 4 4 \N \N \N +5 5 5 5 \N \N \N +6 6 str_6 str_6 \N \N \N +7 7 str_7 str_7 \N \N \N +8 8 str_8 str_8 \N \N \N +9 9 \N \N \N \N \N +10 10 \N \N \N \N \N +11 11 \N \N \N \N \N +12 12 12 12 \N \N \N +13 13 str_13 str_13 \N \N \N +14 14 \N \N \N \N \N +insert after alter modify column 1 +8 None +11 String +0 0 \N \N \N \N \N +1 1 \N \N \N \N \N +2 2 \N \N \N \N \N +3 3 3 3 \N \N \N +4 4 4 4 \N \N \N +5 5 5 5 \N \N \N +6 6 str_6 str_6 \N \N \N +7 7 str_7 str_7 \N \N \N +8 8 str_8 str_8 \N \N \N +9 9 \N \N \N \N \N +10 10 \N \N \N \N \N +11 11 \N \N \N \N \N +12 12 12 12 \N \N \N +13 13 str_13 str_13 \N \N \N +14 14 \N \N \N \N \N +15 15 \N \N \N \N \N +16 16 16 16 \N \N \N +17 17 str_17 str_17 \N \N \N +18 18 1970-01-19 1970-01-19 \N \N \N +alter modify column 2 +8 None +11 String +0 0 \N \N \N \N \N +1 1 \N \N \N \N \N +2 2 \N \N \N \N \N +3 3 3 3 \N \N \N +4 4 4 4 \N \N \N +5 5 5 5 \N \N \N +6 6 str_6 str_6 \N \N \N +7 7 str_7 str_7 \N \N \N +8 8 str_8 str_8 \N \N \N +9 9 \N \N \N \N \N +10 10 \N \N \N \N \N +11 11 \N \N \N \N \N +12 12 12 12 \N \N \N +13 13 str_13 str_13 \N \N \N +14 14 \N \N \N \N \N +15 15 \N \N \N \N \N +16 16 16 16 \N \N \N +17 17 str_17 str_17 \N \N \N +18 18 1970-01-19 1970-01-19 \N \N \N +insert after alter modify column 2 +1 Date +1 UInt64 +9 None +12 String +0 0 \N \N \N \N \N +1 1 \N \N \N \N \N +2 2 \N \N \N \N \N +3 3 3 3 \N \N \N +4 4 4 4 \N \N \N +5 5 5 5 \N \N \N +6 6 str_6 str_6 \N \N \N +7 7 str_7 str_7 \N \N \N +8 8 str_8 str_8 \N \N \N +9 9 \N \N \N \N \N +10 10 \N \N \N \N \N +11 11 \N \N \N \N \N +12 12 12 12 \N \N \N +13 13 str_13 str_13 \N \N \N +14 14 \N \N \N \N \N +15 15 \N \N \N \N \N +16 16 16 16 \N \N \N +17 17 str_17 str_17 \N \N \N +18 18 1970-01-19 1970-01-19 \N \N \N +19 19 \N \N \N \N \N +20 20 20 \N 20 \N \N +21 21 str_21 str_21 \N \N \N +22 22 1970-01-23 \N \N 1970-01-23 \N +alter modify column 3 +1 Date +1 UInt64 +9 None +12 String +0 0 0 \N \N \N \N \N \N +1 1 1 \N \N \N \N \N \N +2 2 2 \N \N \N \N \N \N +3 3 3 \N \N 3 \N \N \N +4 4 4 \N \N 4 \N \N \N +5 5 5 \N \N 5 \N \N \N +6 6 6 \N \N str_6 \N \N \N +7 7 7 \N \N str_7 \N \N \N +8 8 8 \N \N str_8 \N \N \N +9 9 9 \N \N \N \N \N \N +10 10 10 \N \N \N \N \N \N +11 11 11 \N \N \N \N \N \N +12 12 12 \N \N 12 \N \N \N +13 13 13 \N \N str_13 \N \N \N +14 14 14 \N \N \N \N \N \N +15 15 15 \N \N \N \N \N \N +16 16 16 \N \N 16 \N \N \N +17 17 17 \N \N str_17 \N \N \N +18 18 18 \N \N 1970-01-19 \N \N \N +19 19 19 \N \N \N \N \N \N +20 20 20 \N \N \N 20 \N \N +21 21 21 \N \N str_21 \N \N \N +22 22 22 \N \N \N \N 1970-01-23 \N +insert after alter modify column 3 +1 Date +1 UInt64 +12 None +12 String +0 0 0 \N \N \N \N \N \N +1 1 1 \N \N \N \N \N \N +2 2 2 \N \N \N \N \N \N +3 3 3 \N \N 3 \N \N \N +4 4 4 \N \N 4 \N \N \N +5 5 5 \N \N 5 \N \N \N +6 6 6 \N \N str_6 \N \N \N +7 7 7 \N \N str_7 \N \N \N +8 8 8 \N \N str_8 \N \N \N +9 9 9 \N \N \N \N \N \N +10 10 10 \N \N \N \N \N \N +11 11 11 \N \N \N \N \N \N +12 12 12 \N \N 12 \N \N \N +13 13 13 \N \N str_13 \N \N \N +14 14 14 \N \N \N \N \N \N +15 15 15 \N \N \N \N \N \N +16 16 16 \N \N 16 \N \N \N +17 17 17 \N \N str_17 \N \N \N +18 18 18 \N \N 1970-01-19 \N \N \N +19 19 19 \N \N \N \N \N \N +20 20 20 \N \N \N 20 \N \N +21 21 21 \N \N str_21 \N \N \N +22 22 22 \N \N \N \N 1970-01-23 \N +23 \N \N \N \N \N \N \N \N +24 24 24 \N \N \N \N \N \N +25 str_25 \N str_25 \N \N \N \N \N diff --git a/tests/queries/0_stateless/03040_dynamic_type_alters_1.sh b/tests/queries/0_stateless/03040_dynamic_type_alters_1.sh new file mode 100755 index 00000000000..7a73be20a4d --- /dev/null +++ b/tests/queries/0_stateless/03040_dynamic_type_alters_1.sh @@ -0,0 +1,77 @@ +#!/usr/bin/env bash +# Tags: long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# reset --log_comment +CLICKHOUSE_LOG_COMMENT= +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_analyzer=1" + +function run() +{ + echo "initial insert" + $CH_CLIENT -q "insert into test select number, number from numbers(3)" + + echo "alter add column 1" + $CH_CLIENT -q "alter table test add column d Dynamic(max_types=3) settings mutations_sync=1" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + $CH_CLIENT -q "select x, y, d, d.String, d.UInt64, d.\`Tuple(a UInt64)\`.a from test order by x" + + echo "insert after alter add column 1" + $CH_CLIENT -q "insert into test select number, number, number from numbers(3, 3)" + $CH_CLIENT -q "insert into test select number, number, 'str_' || toString(number) from numbers(6, 3)" + $CH_CLIENT -q "insert into test select number, number, NULL from numbers(9, 3)" + $CH_CLIENT -q "insert into test select number, number, multiIf(number % 3 == 0, number, number % 3 == 1, 'str_' || toString(number), NULL) from numbers(12, 3)" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + $CH_CLIENT -q "select x, y, d, d.String, d.UInt64, d.Date, d.\`Tuple(a UInt64)\`.a from test order by x" + + echo "alter modify column 1" + $CH_CLIENT -q "alter table test modify column d Dynamic(max_types=1) settings mutations_sync=1" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + $CH_CLIENT -q "select x, y, d, d.String, d.UInt64, d.Date, d.\`Tuple(a UInt64)\`.a from test order by x" + + echo "insert after alter modify column 1" + $CH_CLIENT -q "insert into test select number, number, multiIf(number % 4 == 0, number, number % 4 == 1, 'str_' || toString(number), number % 4 == 2, toDate(number), NULL) from numbers(15, 4)" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + $CH_CLIENT -q "select x, y, d, d.String, d.UInt64, d.Date, d.\`Tuple(a UInt64)\`.a from test order by x" + + echo "alter modify column 2" + $CH_CLIENT -q "alter table test modify column d Dynamic(max_types=3) settings mutations_sync=1" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + $CH_CLIENT -q "select x, y, d, d.String, d.UInt64, d.Date, d.\`Tuple(a UInt64)\`.a from test order by x" + + echo "insert after alter modify column 2" + $CH_CLIENT -q "insert into test select number, number, multiIf(number % 4 == 0, number, number % 4 == 1, 'str_' || toString(number), number % 4 == 2, toDate(number), NULL) from numbers(19, 4)" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + $CH_CLIENT -q "select x, y, d, d.String, d.UInt64, d.Date, d.\`Tuple(a UInt64)\`.a from test order by x" + + echo "alter modify column 3" + $CH_CLIENT -q "alter table test modify column y Dynamic settings mutations_sync=1" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + $CH_CLIENT -q "select x, y, y.UInt64, y.String, y.\`Tuple(a UInt64)\`.a, d.String, d.UInt64, d.Date, d.\`Tuple(a UInt64)\`.a from test order by x" + + echo "insert after alter modify column 3" + $CH_CLIENT -q "insert into test select number, multiIf(number % 3 == 0, number, number % 3 == 1, 'str_' || toString(number), NULL), NULL from numbers(23, 3)" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + $CH_CLIENT -q "select x, y, y.UInt64, y.String, y.\`Tuple(a UInt64)\`.a, d.String, d.UInt64, d.Date, d.\`Tuple(a UInt64)\`.a from test order by x" +} + +$CH_CLIENT -q "drop table if exists test;" + +echo "Memory" +$CH_CLIENT -q "create table test (x UInt64, y UInt64) engine=Memory" +run +$CH_CLIENT -q "drop table test;" + +echo "MergeTree compact" +$CH_CLIENT -q "create table test (x UInt64, y UInt64) engine=MergeTree order by x settings min_rows_for_wide_part=100000000, min_bytes_for_wide_part=1000000000;" +run +$CH_CLIENT -q "drop table test;" + +echo "MergeTree wide" +$CH_CLIENT -q "create table test (x UInt64, y UInt64 ) engine=MergeTree order by x settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;" +run +$CH_CLIENT -q "drop table test;" + diff --git a/tests/queries/0_stateless/03040_dynamic_type_alters_2.reference b/tests/queries/0_stateless/03040_dynamic_type_alters_2.reference new file mode 100644 index 00000000000..18a181464e9 --- /dev/null +++ b/tests/queries/0_stateless/03040_dynamic_type_alters_2.reference @@ -0,0 +1,182 @@ +MergeTree compact +initial insert +alter add column +3 None +0 0 \N \N \N \N +1 1 \N \N \N \N +2 2 \N \N \N \N +insert after alter add column 1 +4 String +4 UInt64 +7 None +0 0 \N \N \N \N \N +1 1 \N \N \N \N \N +2 2 \N \N \N \N \N +3 3 3 \N 3 \N \N +4 4 4 \N 4 \N \N +5 5 5 \N 5 \N \N +6 6 str_6 str_6 \N \N \N +7 7 str_7 str_7 \N \N \N +8 8 str_8 str_8 \N \N \N +9 9 \N \N \N \N \N +10 10 \N \N \N \N \N +11 11 \N \N \N \N \N +12 12 12 \N 12 \N \N +13 13 str_13 str_13 \N \N \N +14 14 \N \N \N \N \N +alter rename column 1 +4 String +4 UInt64 +7 None +0 0 \N \N \N \N \N +1 1 \N \N \N \N \N +2 2 \N \N \N \N \N +3 3 3 \N 3 \N \N +4 4 4 \N 4 \N \N +5 5 5 \N 5 \N \N +6 6 str_6 str_6 \N \N \N +7 7 str_7 str_7 \N \N \N +8 8 str_8 str_8 \N \N \N +9 9 \N \N \N \N \N +10 10 \N \N \N \N \N +11 11 \N \N \N \N \N +12 12 12 \N 12 \N \N +13 13 str_13 str_13 \N \N \N +14 14 \N \N \N \N \N +insert nested dynamic +3 Array(Dynamic) +4 String +4 UInt64 +7 None +0 0 \N \N \N \N \N [] [] [] +1 1 \N \N \N \N \N [] [] [] +2 2 \N \N \N \N \N [] [] [] +3 3 3 \N 3 \N \N [] [] [] +4 4 4 \N 4 \N \N [] [] [] +5 5 5 \N 5 \N \N [] [] [] +6 6 str_6 str_6 \N \N \N [] [] [] +7 7 str_7 str_7 \N \N \N [] [] [] +8 8 str_8 str_8 \N \N \N [] [] [] +9 9 \N \N \N \N \N [] [] [] +10 10 \N \N \N \N \N [] [] [] +11 11 \N \N \N \N \N [] [] [] +12 12 12 \N 12 \N \N [] [] [] +13 13 str_13 str_13 \N \N \N [] [] [] +14 14 \N \N \N \N \N [] [] [] +15 15 [15] \N \N \N \N [15] [NULL] [NULL] +16 16 ['str_16'] \N \N \N \N [NULL] ['str_16'] [NULL] +17 17 [17] \N \N \N \N [17] [NULL] [NULL] +alter rename column 2 +3 Array(Dynamic) +4 String +4 UInt64 +7 None +0 0 \N \N \N \N \N [] [] [] +1 1 \N \N \N \N \N [] [] [] +2 2 \N \N \N \N \N [] [] [] +3 3 3 \N 3 \N \N [] [] [] +4 4 4 \N 4 \N \N [] [] [] +5 5 5 \N 5 \N \N [] [] [] +6 6 str_6 str_6 \N \N \N [] [] [] +7 7 str_7 str_7 \N \N \N [] [] [] +8 8 str_8 str_8 \N \N \N [] [] [] +9 9 \N \N \N \N \N [] [] [] +10 10 \N \N \N \N \N [] [] [] +11 11 \N \N \N \N \N [] [] [] +12 12 12 \N 12 \N \N [] [] [] +13 13 str_13 str_13 \N \N \N [] [] [] +14 14 \N \N \N \N \N [] [] [] +15 15 [15] \N \N \N \N [15] [NULL] [NULL] +16 16 ['str_16'] \N \N \N \N [NULL] ['str_16'] [NULL] +17 17 [17] \N \N \N \N [17] [NULL] [NULL] +MergeTree wide +initial insert +alter add column +3 None +0 0 \N \N \N \N +1 1 \N \N \N \N +2 2 \N \N \N \N +insert after alter add column 1 +4 String +4 UInt64 +7 None +0 0 \N \N \N \N \N +1 1 \N \N \N \N \N +2 2 \N \N \N \N \N +3 3 3 \N 3 \N \N +4 4 4 \N 4 \N \N +5 5 5 \N 5 \N \N +6 6 str_6 str_6 \N \N \N +7 7 str_7 str_7 \N \N \N +8 8 str_8 str_8 \N \N \N +9 9 \N \N \N \N \N +10 10 \N \N \N \N \N +11 11 \N \N \N \N \N +12 12 12 \N 12 \N \N +13 13 str_13 str_13 \N \N \N +14 14 \N \N \N \N \N +alter rename column 1 +4 String +4 UInt64 +7 None +0 0 \N \N \N \N \N +1 1 \N \N \N \N \N +2 2 \N \N \N \N \N +3 3 3 \N 3 \N \N +4 4 4 \N 4 \N \N +5 5 5 \N 5 \N \N +6 6 str_6 str_6 \N \N \N +7 7 str_7 str_7 \N \N \N +8 8 str_8 str_8 \N \N \N +9 9 \N \N \N \N \N +10 10 \N \N \N \N \N +11 11 \N \N \N \N \N +12 12 12 \N 12 \N \N +13 13 str_13 str_13 \N \N \N +14 14 \N \N \N \N \N +insert nested dynamic +3 Array(Dynamic) +4 String +4 UInt64 +7 None +0 0 \N \N \N \N \N [] [] [] +1 1 \N \N \N \N \N [] [] [] +2 2 \N \N \N \N \N [] [] [] +3 3 3 \N 3 \N \N [] [] [] +4 4 4 \N 4 \N \N [] [] [] +5 5 5 \N 5 \N \N [] [] [] +6 6 str_6 str_6 \N \N \N [] [] [] +7 7 str_7 str_7 \N \N \N [] [] [] +8 8 str_8 str_8 \N \N \N [] [] [] +9 9 \N \N \N \N \N [] [] [] +10 10 \N \N \N \N \N [] [] [] +11 11 \N \N \N \N \N [] [] [] +12 12 12 \N 12 \N \N [] [] [] +13 13 str_13 str_13 \N \N \N [] [] [] +14 14 \N \N \N \N \N [] [] [] +15 15 [15] \N \N \N \N [15] [NULL] [NULL] +16 16 ['str_16'] \N \N \N \N [NULL] ['str_16'] [NULL] +17 17 [17] \N \N \N \N [17] [NULL] [NULL] +alter rename column 2 +3 Array(Dynamic) +4 String +4 UInt64 +7 None +0 0 \N \N \N \N \N [] [] [] +1 1 \N \N \N \N \N [] [] [] +2 2 \N \N \N \N \N [] [] [] +3 3 3 \N 3 \N \N [] [] [] +4 4 4 \N 4 \N \N [] [] [] +5 5 5 \N 5 \N \N [] [] [] +6 6 str_6 str_6 \N \N \N [] [] [] +7 7 str_7 str_7 \N \N \N [] [] [] +8 8 str_8 str_8 \N \N \N [] [] [] +9 9 \N \N \N \N \N [] [] [] +10 10 \N \N \N \N \N [] [] [] +11 11 \N \N \N \N \N [] [] [] +12 12 12 \N 12 \N \N [] [] [] +13 13 str_13 str_13 \N \N \N [] [] [] +14 14 \N \N \N \N \N [] [] [] +15 15 [15] \N \N \N \N [15] [NULL] [NULL] +16 16 ['str_16'] \N \N \N \N [NULL] ['str_16'] [NULL] +17 17 [17] \N \N \N \N [17] [NULL] [NULL] diff --git a/tests/queries/0_stateless/03040_dynamic_type_alters_2.sh b/tests/queries/0_stateless/03040_dynamic_type_alters_2.sh new file mode 100755 index 00000000000..6491e64372f --- /dev/null +++ b/tests/queries/0_stateless/03040_dynamic_type_alters_2.sh @@ -0,0 +1,57 @@ +#!/usr/bin/env bash +# Tags: long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# reset --log_comment +CLICKHOUSE_LOG_COMMENT= +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 --allow_experimental_variant_type=1 --use_variant_as_common_type=1" + +function run() +{ + echo "initial insert" + $CH_CLIENT -q "insert into test select number, number from numbers(3)" + + echo "alter add column" + $CH_CLIENT -q "alter table test add column d Dynamic settings mutations_sync=1" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + $CH_CLIENT -q "select x, y, d, d.String, d.UInt64, d.\`Tuple(a UInt64)\`.a from test order by x" + + echo "insert after alter add column 1" + $CH_CLIENT -q "insert into test select number, number, number from numbers(3, 3)" + $CH_CLIENT -q "insert into test select number, number, 'str_' || toString(number) from numbers(6, 3)" + $CH_CLIENT -q "insert into test select number, number, NULL from numbers(9, 3)" + $CH_CLIENT -q "insert into test select number, number, multiIf(number % 3 == 0, number, number % 3 == 1, 'str_' || toString(number), NULL) from numbers(12, 3)" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + $CH_CLIENT -q "select x, y, d, d.String, d.UInt64, d.Date, d.\`Tuple(a UInt64)\`.a from test order by x" + + echo "alter rename column 1" + $CH_CLIENT -q "alter table test rename column d to d1 settings mutations_sync=1" + $CH_CLIENT -q "select count(), dynamicType(d1) from test group by dynamicType(d1) order by count(), dynamicType(d1)" + $CH_CLIENT -q "select x, y, d1, d1.String, d1.UInt64, d1.Date, d1.\`Tuple(a UInt64)\`.a from test order by x" + + echo "insert nested dynamic" + $CH_CLIENT -q "insert into test select number, number, [number % 2 ? number : 'str_' || toString(number)]::Array(Dynamic) from numbers(15, 3)" + $CH_CLIENT -q "select count(), dynamicType(d1) from test group by dynamicType(d1) order by count(), dynamicType(d1)" + $CH_CLIENT -q "select x, y, d1, d1.String, d1.UInt64, d1.Date, d1.\`Tuple(a UInt64)\`.a, d1.\`Array(Dynamic)\`.UInt64, d1.\`Array(Dynamic)\`.String, d1.\`Array(Dynamic)\`.Date from test order by x" + + echo "alter rename column 2" + $CH_CLIENT -q "alter table test rename column d1 to d2 settings mutations_sync=1" + $CH_CLIENT -q "select count(), dynamicType(d2) from test group by dynamicType(d2) order by count(), dynamicType(d2)" + $CH_CLIENT -q "select x, y, d2, d2.String, d2.UInt64, d2.Date, d2.\`Tuple(a UInt64)\`.a, d2.\`Array(Dynamic)\`.UInt64, d2.\`Array(Dynamic)\`.String, d2.\`Array(Dynamic)\`.Date, from test order by x" +} + +$CH_CLIENT -q "drop table if exists test;" + +echo "MergeTree compact" +$CH_CLIENT -q "create table test (x UInt64, y UInt64) engine=MergeTree order by x settings min_rows_for_wide_part=100000000, min_bytes_for_wide_part=1000000000;" +run +$CH_CLIENT -q "drop table test;" + +echo "MergeTree wide" +$CH_CLIENT -q "create table test (x UInt64, y UInt64 ) engine=MergeTree order by x settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;" +run +$CH_CLIENT -q "drop table test;" + diff --git a/tests/queries/0_stateless/03041_dynamic_type_check_table.reference b/tests/queries/0_stateless/03041_dynamic_type_check_table.reference new file mode 100644 index 00000000000..b1ea186a917 --- /dev/null +++ b/tests/queries/0_stateless/03041_dynamic_type_check_table.reference @@ -0,0 +1,56 @@ +MergeTree compact +initial insert +alter add column +3 None +0 0 \N \N \N \N +1 1 \N \N \N \N +2 2 \N \N \N \N +insert after alter add column +4 String +4 UInt64 +7 None +0 0 \N \N \N \N \N +1 1 \N \N \N \N \N +2 2 \N \N \N \N \N +3 3 3 \N 3 \N \N +4 4 4 \N 4 \N \N +5 5 5 \N 5 \N \N +6 6 str_6 str_6 \N \N \N +7 7 str_7 str_7 \N \N \N +8 8 str_8 str_8 \N \N \N +9 9 \N \N \N \N \N +10 10 \N \N \N \N \N +11 11 \N \N \N \N \N +12 12 12 \N 12 \N \N +13 13 str_13 str_13 \N \N \N +14 14 \N \N \N \N \N +check table +1 +MergeTree wide +initial insert +alter add column +3 None +0 0 \N \N \N \N +1 1 \N \N \N \N +2 2 \N \N \N \N +insert after alter add column +4 String +4 UInt64 +7 None +0 0 \N \N \N \N \N +1 1 \N \N \N \N \N +2 2 \N \N \N \N \N +3 3 3 \N 3 \N \N +4 4 4 \N 4 \N \N +5 5 5 \N 5 \N \N +6 6 str_6 str_6 \N \N \N +7 7 str_7 str_7 \N \N \N +8 8 str_8 str_8 \N \N \N +9 9 \N \N \N \N \N +10 10 \N \N \N \N \N +11 11 \N \N \N \N \N +12 12 12 \N 12 \N \N +13 13 str_13 str_13 \N \N \N +14 14 \N \N \N \N \N +check table +1 diff --git a/tests/queries/0_stateless/03041_dynamic_type_check_table.sh b/tests/queries/0_stateless/03041_dynamic_type_check_table.sh new file mode 100755 index 00000000000..3d802485be3 --- /dev/null +++ b/tests/queries/0_stateless/03041_dynamic_type_check_table.sh @@ -0,0 +1,45 @@ +#!/usr/bin/env bash +# Tags: long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# reset --log_comment +CLICKHOUSE_LOG_COMMENT= +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 --allow_experimental_variant_type=1 --use_variant_as_common_type=1" + +function run() +{ + echo "initial insert" + $CH_CLIENT -q "insert into test select number, number from numbers(3)" + + echo "alter add column" + $CH_CLIENT -q "alter table test add column d Dynamic(max_types=3) settings mutations_sync=1" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + $CH_CLIENT -q "select x, y, d, d.String, d.UInt64, d.\`Tuple(a UInt64)\`.a from test order by x" + + echo "insert after alter add column" + $CH_CLIENT -q "insert into test select number, number, number from numbers(3, 3)" + $CH_CLIENT -q "insert into test select number, number, 'str_' || toString(number) from numbers(6, 3)" + $CH_CLIENT -q "insert into test select number, number, NULL from numbers(9, 3)" + $CH_CLIENT -q "insert into test select number, number, multiIf(number % 3 == 0, number, number % 3 == 1, 'str_' || toString(number), NULL) from numbers(12, 3)" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + $CH_CLIENT -q "select x, y, d, d.String, d.UInt64, d.Date, d.\`Tuple(a UInt64)\`.a from test order by x" + + echo "check table" + $CH_CLIENT -q "check table test" +} + +$CH_CLIENT -q "drop table if exists test;" + +echo "MergeTree compact" +$CH_CLIENT -q "create table test (x UInt64, y UInt64) engine=MergeTree order by x settings min_rows_for_wide_part=100000000, min_bytes_for_wide_part=1000000000;" +run +$CH_CLIENT -q "drop table test;" + +echo "MergeTree wide" +$CH_CLIENT -q "create table test (x UInt64, y UInt64 ) engine=MergeTree order by x settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;" +run +$CH_CLIENT -q "drop table test;" + diff --git a/tests/queries/0_stateless/03130_generateSnowflakeId.reference b/tests/queries/0_stateless/03130_generateSnowflakeId.reference new file mode 100644 index 00000000000..f5b7872f81e --- /dev/null +++ b/tests/queries/0_stateless/03130_generateSnowflakeId.reference @@ -0,0 +1,9 @@ +-- generateSnowflakeID +1 +0 +0 +1 +100 +-- generateSnowflakeIDThreadMonotonic +1 +100 diff --git a/tests/queries/0_stateless/03130_generateSnowflakeId.sql b/tests/queries/0_stateless/03130_generateSnowflakeId.sql new file mode 100644 index 00000000000..57cdd21a9fe --- /dev/null +++ b/tests/queries/0_stateless/03130_generateSnowflakeId.sql @@ -0,0 +1,29 @@ +SELECT '-- generateSnowflakeID'; + +SELECT bitAnd(bitShiftRight(toUInt64(generateSnowflakeID()), 63), 1) = 0; -- check first bit is zero + +SELECT generateSnowflakeID(1) = generateSnowflakeID(2); -- disabled common subexpression elimination --> lhs != rhs +SELECT generateSnowflakeID() = generateSnowflakeID(1); -- same as ^^ +SELECT generateSnowflakeID(1) = generateSnowflakeID(1); -- enabled common subexpression elimination + +SELECT generateSnowflakeID(1, 2); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } + +SELECT count(*) +FROM +( + SELECT DISTINCT generateSnowflakeID() + FROM numbers(100) +); + +SELECT '-- generateSnowflakeIDThreadMonotonic'; + +SELECT bitAnd(bitShiftRight(toUInt64(generateSnowflakeIDThreadMonotonic()), 63), 1) = 0; -- check first bit is zero + +SELECT generateSnowflakeIDThreadMonotonic(1, 2); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } + +SELECT count(*) +FROM +( + SELECT DISTINCT generateSnowflakeIDThreadMonotonic() + FROM numbers(100) +); diff --git a/tests/queries/0_stateless/03144_compress_stdout.reference b/tests/queries/0_stateless/03144_compress_stdout.reference new file mode 100644 index 00000000000..6f51dfc24e1 --- /dev/null +++ b/tests/queries/0_stateless/03144_compress_stdout.reference @@ -0,0 +1,2 @@ +Hello, World! From client. +Hello, World! From local. diff --git a/tests/queries/0_stateless/03144_compress_stdout.sh b/tests/queries/0_stateless/03144_compress_stdout.sh new file mode 100755 index 00000000000..569754303a7 --- /dev/null +++ b/tests/queries/0_stateless/03144_compress_stdout.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +set -e + +[ -e "${CLICKHOUSE_TMP}"/test_compression_of_output_file_from_client.gz ] && rm "${CLICKHOUSE_TMP}"/test_compression_of_output_file_from_client.gz + +${CLICKHOUSE_CLIENT} --query "SELECT * FROM (SELECT 'Hello, World! From client.')" > ${CLICKHOUSE_TMP}/test_compression_of_output_file_from_client.gz +gunzip ${CLICKHOUSE_TMP}/test_compression_of_output_file_from_client.gz +cat ${CLICKHOUSE_TMP}/test_compression_of_output_file_from_client + +rm -f "${CLICKHOUSE_TMP}/test_compression_of_output_file_from_client" + +[ -e "${CLICKHOUSE_TMP}"/test_compression_of_output_file_from_local.gz ] && rm "${CLICKHOUSE_TMP}"/test_compression_of_output_file_from_local.gz + +${CLICKHOUSE_LOCAL} --query "SELECT * FROM (SELECT 'Hello, World! From local.')" > ${CLICKHOUSE_TMP}/test_compression_of_output_file_from_local.gz +gunzip ${CLICKHOUSE_TMP}/test_compression_of_output_file_from_local.gz +cat ${CLICKHOUSE_TMP}/test_compression_of_output_file_from_local + +rm -f "${CLICKHOUSE_TMP}/test_compression_of_output_file_from_local" diff --git a/tests/queries/0_stateless/03147_system_columns_access_checks.sh b/tests/queries/0_stateless/03147_system_columns_access_checks.sh index 2bd7fb083ea..b027ea28504 100755 --- a/tests/queries/0_stateless/03147_system_columns_access_checks.sh +++ b/tests/queries/0_stateless/03147_system_columns_access_checks.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-parallel, no-ordinary-database, long +# Tags: no-fasttest, no-parallel, no-ordinary-database, long, no-debug, no-asan, no-tsan, no-msan CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/03150_dynamic_type_mv_insert.reference b/tests/queries/0_stateless/03150_dynamic_type_mv_insert.reference new file mode 100644 index 00000000000..0b76d30953e --- /dev/null +++ b/tests/queries/0_stateless/03150_dynamic_type_mv_insert.reference @@ -0,0 +1,35 @@ +1 2024-01-01 Date +2 1704056400 Decimal(18, 3) +3 1 String +4 2 String + +1 2024-01-01 Date +1 2024-01-01 Date +2 1704056400 Decimal(18, 3) +2 1704056400 Decimal(18, 3) +3 1 String +3 1 String +4 2 String +4 2 String + +1 2024-01-01 String +1 2024-01-01 String +2 1704056400 String +2 1704056400 String +3 1 String +3 1 String +4 2 String +4 2 String + +1 2024-01-01 Date +1 2024-01-01 String +1 2024-01-01 String +2 1704056400 Decimal(18, 3) +2 1704056400 String +2 1704056400 String +3 1 String +3 1 String +3 1 String +4 2 String +4 2 String +4 2 String diff --git a/tests/queries/0_stateless/03150_dynamic_type_mv_insert.sql b/tests/queries/0_stateless/03150_dynamic_type_mv_insert.sql new file mode 100644 index 00000000000..ad5ea9512c6 --- /dev/null +++ b/tests/queries/0_stateless/03150_dynamic_type_mv_insert.sql @@ -0,0 +1,34 @@ +SET allow_experimental_dynamic_type=1; + +CREATE TABLE null_table +( + n1 UInt8, + n2 Dynamic(max_types=3) +) +ENGINE = Null; + +CREATE MATERIALIZED VIEW dummy_rmv TO to_table +AS SELECT * FROM null_table; + +CREATE TABLE to_table +( + n1 UInt8, + n2 Dynamic(max_types=4) +) +ENGINE = MergeTree ORDER BY n1; + +INSERT INTO null_table ( n1, n2 ) VALUES (1, '2024-01-01'), (2, toDateTime64('2024-01-01', 3, 'Asia/Istanbul')), (3, toFloat32(1)), (4, toFloat64(2)); +SELECT *, dynamicType(n2) FROM to_table ORDER BY ALL; + +select ''; +INSERT INTO null_table ( n1, n2 ) VALUES (1, '2024-01-01'), (2, toDateTime64('2024-01-01', 3, 'Asia/Istanbul')), (3, toFloat32(1)), (4, toFloat64(2)); +SELECT *, dynamicType(n2) FROM to_table ORDER BY ALL; + +select ''; +ALTER TABLE to_table MODIFY COLUMN n2 Dynamic(max_types=1); +SELECT *, dynamicType(n2) FROM to_table ORDER BY ALL; + +select ''; +ALTER TABLE to_table MODIFY COLUMN n2 Dynamic(max_types=10); +INSERT INTO null_table ( n1, n2 ) VALUES (1, '2024-01-01'), (2, toDateTime64('2024-01-01', 3, 'Asia/Istanbul')), (3, toFloat32(1)), (4, toFloat64(2)); +SELECT *, dynamicType(n2) FROM to_table ORDER BY ALL; diff --git a/tests/queries/0_stateless/03151_dynamic_type_scale_max_types.reference b/tests/queries/0_stateless/03151_dynamic_type_scale_max_types.reference new file mode 100644 index 00000000000..d96fbf658d8 --- /dev/null +++ b/tests/queries/0_stateless/03151_dynamic_type_scale_max_types.reference @@ -0,0 +1,26 @@ +1 2024-01-01 Date +2 1704056400 String +3 1 String +4 2 String + +1 2024-01-01 Date +1 2024-01-01 Date +2 1704056400 Decimal(18, 3) +2 1704056400 String +3 1 Float32 +3 1 String +4 2 Float64 +4 2 String + +1 2024-01-01 String +1 2024-01-01 String +1 2024-01-01 String +2 1704056400 String +2 1704056400 String +2 1704056400 String +3 1 String +3 1 String +3 1 String +4 2 String +4 2 String +4 2 String diff --git a/tests/queries/0_stateless/03151_dynamic_type_scale_max_types.sql b/tests/queries/0_stateless/03151_dynamic_type_scale_max_types.sql new file mode 100644 index 00000000000..632f3504fdb --- /dev/null +++ b/tests/queries/0_stateless/03151_dynamic_type_scale_max_types.sql @@ -0,0 +1,26 @@ +SET allow_experimental_dynamic_type=1; +set min_compress_block_size = 585572, max_compress_block_size = 373374, max_block_size = 60768, max_joined_block_size_rows = 18966, max_insert_threads = 5, max_threads = 50, max_read_buffer_size = 708232, connect_timeout_with_failover_ms = 2000, connect_timeout_with_failover_secure_ms = 3000, idle_connection_timeout = 36000, use_uncompressed_cache = true, stream_like_engine_allow_direct_select = true, replication_wait_for_inactive_replica_timeout = 30, compile_aggregate_expressions = false, min_count_to_compile_aggregate_expression = 0, compile_sort_description = false, group_by_two_level_threshold = 1000000, group_by_two_level_threshold_bytes = 12610083, enable_memory_bound_merging_of_aggregation_results = false, min_chunk_bytes_for_parallel_parsing = 18769830, merge_tree_coarse_index_granularity = 12, min_bytes_to_use_direct_io = 10737418240, min_bytes_to_use_mmap_io = 10737418240, log_queries = true, insert_quorum_timeout = 60000, merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.05000000074505806, http_response_buffer_size = 294986, fsync_metadata = true, http_send_timeout = 60., http_receive_timeout = 60., opentelemetry_start_trace_probability = 0.10000000149011612, max_bytes_before_external_group_by = 1, max_bytes_before_external_sort = 10737418240, max_bytes_before_remerge_sort = 1326536545, max_untracked_memory = 1048576, memory_profiler_step = 1048576, log_comment = '03151_dynamic_type_scale_max_types.sql', send_logs_level = 'fatal', prefer_localhost_replica = false, optimize_read_in_order = false, optimize_aggregation_in_order = true, aggregation_in_order_max_block_bytes = 27069500, read_in_order_two_level_merge_threshold = 75, allow_introspection_functions = true, database_atomic_wait_for_drop_and_detach_synchronously = true, remote_filesystem_read_method = 'read', local_filesystem_read_prefetch = true, remote_filesystem_read_prefetch = false, merge_tree_compact_parts_min_granules_to_multibuffer_read = 119, async_insert_busy_timeout_max_ms = 5000, read_from_filesystem_cache_if_exists_otherwise_bypass_cache = true, filesystem_cache_segments_batch_size = 10, use_page_cache_for_disks_without_file_cache = true, page_cache_inject_eviction = true, allow_prefetched_read_pool_for_remote_filesystem = false, filesystem_prefetch_step_marks = 50, filesystem_prefetch_min_bytes_for_single_read_task = 16777216, filesystem_prefetch_max_memory_usage = 134217728, filesystem_prefetches_limit = 10, optimize_sorting_by_input_stream_properties = false, allow_experimental_dynamic_type = true, session_timezone = 'Africa/Khartoum', prefer_warmed_unmerged_parts_seconds = 2; + +drop table if exists to_table; + +CREATE TABLE to_table +( + n1 UInt8, + n2 Dynamic(max_types=2) +) +ENGINE = MergeTree ORDER BY n1; + +INSERT INTO to_table ( n1, n2 ) VALUES (1, '2024-01-01'), (2, toDateTime64('2024-01-01', 3, 'Asia/Istanbul')), (3, toFloat32(1)), (4, toFloat64(2)); +SELECT *, dynamicType(n2) FROM to_table ORDER BY ALL; + +select ''; +ALTER TABLE to_table MODIFY COLUMN n2 Dynamic(max_types=5); +INSERT INTO to_table ( n1, n2 ) VALUES (1, '2024-01-01'), (2, toDateTime64('2024-01-01', 3, 'Asia/Istanbul')), (3, toFloat32(1)), (4, toFloat64(2)); +SELECT *, dynamicType(n2) FROM to_table ORDER BY ALL; + +select ''; +ALTER TABLE to_table MODIFY COLUMN n2 Dynamic(max_types=1); +INSERT INTO to_table ( n1, n2 ) VALUES (1, '2024-01-01'), (2, toDateTime64('2024-01-01', 3, 'Asia/Istanbul')), (3, toFloat32(1)), (4, toFloat64(2)); +SELECT *, dynamicType(n2) FROM to_table ORDER BY ALL; + +ALTER TABLE to_table MODIFY COLUMN n2 Dynamic(max_types=500); -- { serverError UNEXPECTED_AST_STRUCTURE } diff --git a/tests/queries/0_stateless/03152_analyzer_columns_list.reference b/tests/queries/0_stateless/03152_analyzer_columns_list.reference new file mode 100644 index 00000000000..4e9025b5baf --- /dev/null +++ b/tests/queries/0_stateless/03152_analyzer_columns_list.reference @@ -0,0 +1 @@ +4 3 diff --git a/tests/queries/0_stateless/03152_analyzer_columns_list.sql b/tests/queries/0_stateless/03152_analyzer_columns_list.sql new file mode 100644 index 00000000000..baed3a4ff68 --- /dev/null +++ b/tests/queries/0_stateless/03152_analyzer_columns_list.sql @@ -0,0 +1,13 @@ +CREATE TABLE test +( + foo String, + bar String, +) +ENGINE = MergeTree() +ORDER BY (foo, bar); + +INSERT INTO test VALUES ('foo', 'bar1'); + +SELECT COLUMNS(bar, foo) APPLY (length) FROM test; + +SELECT COLUMNS(bar, foo, xyz) APPLY (length) FROM test; -- { serverError UNKNOWN_IDENTIFIER } diff --git a/tests/queries/0_stateless/03152_dynamic_type_simple.reference b/tests/queries/0_stateless/03152_dynamic_type_simple.reference new file mode 100644 index 00000000000..5f243209ff3 --- /dev/null +++ b/tests/queries/0_stateless/03152_dynamic_type_simple.reference @@ -0,0 +1,25 @@ +string1 String +42 Int64 +3.14 Float64 +[1,2] Array(Int64) +2021-01-01 Date +string2 String + +\N None 42 Int64 +42 Int64 string String +string String [1, 2] String +[1,2] Array(Int64) \N None + ┌─d────────────────────────┬─dynamicType(d)─┬─d.Int64─┬─d.String─┬─────d.Date─┬─d.Float64─┬──────────d.DateTime─┬─d.Array(Int64)─┬─d.Array(String)──────────┐ + 1. │ 42 │ Int64 │ 42 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [] │ [] │ + 2. │ string1 │ String │ ᴺᵁᴸᴸ │ string1 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [] │ [] │ + 3. │ 2021-01-01 │ Date │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 2021-01-01 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [] │ [] │ + 4. │ [1,2,3] │ Array(Int64) │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [1,2,3] │ [] │ + 5. │ 3.14 │ Float64 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 3.14 │ ᴺᵁᴸᴸ │ [] │ [] │ + 6. │ string2 │ String │ ᴺᵁᴸᴸ │ string2 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [] │ [] │ + 7. │ 2021-01-01 12:00:00 │ DateTime │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 2021-01-01 12:00:00 │ [] │ [] │ + 8. │ ['array','of','strings'] │ Array(String) │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [] │ ['array','of','strings'] │ + 9. │ ᴺᵁᴸᴸ │ None │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [] │ [] │ +10. │ 42.42 │ Float64 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 42.42 │ ᴺᵁᴸᴸ │ [] │ [] │ + └──────────────────────────┴────────────────┴─────────┴──────────┴────────────┴───────────┴─────────────────────┴────────────────┴──────────────────────────┘ + +49995000 diff --git a/tests/queries/0_stateless/03152_dynamic_type_simple.sql b/tests/queries/0_stateless/03152_dynamic_type_simple.sql new file mode 100644 index 00000000000..fd5328faf15 --- /dev/null +++ b/tests/queries/0_stateless/03152_dynamic_type_simple.sql @@ -0,0 +1,29 @@ +SET allow_experimental_dynamic_type=1; + +CREATE TABLE test_max_types (d Dynamic(max_types=5)) ENGINE = Memory; +INSERT INTO test_max_types VALUES ('string1'), (42), (3.14), ([1, 2]), (toDate('2021-01-01')), ('string2'); +SELECT d, dynamicType(d) FROM test_max_types; + +SELECT ''; +CREATE TABLE test_nested_dynamic (d1 Dynamic, d2 Dynamic(max_types=2)) ENGINE = Memory; +INSERT INTO test_nested_dynamic VALUES (NULL, 42), (42, 'string'), ('string', [1, 2]), ([1, 2], NULL); +SELECT d1, dynamicType(d1), d2, dynamicType(d2) FROM test_nested_dynamic; + +CREATE TABLE test_rapid_schema (d Dynamic) ENGINE = Memory; +INSERT INTO test_rapid_schema VALUES (42), ('string1'), (toDate('2021-01-01')), ([1, 2, 3]), (3.14), ('string2'), (toDateTime('2021-01-01 12:00:00')), (['array', 'of', 'strings']), (NULL), (toFloat64(42.42)); + +SELECT d, dynamicType(d), d.Int64, d.String, d.Date, d.Float64, d.DateTime, d.`Array(Int64)`, d.`Array(String)` +FROM test_rapid_schema FORMAT PrettyCompactMonoBlock; + + +SELECT ''; +SELECT finalizeAggregation(CAST(dynamic_state, 'AggregateFunction(sum, UInt64)')) +FROM +( + SELECT CAST(state, 'Dynamic') AS dynamic_state + FROM + ( + SELECT sumState(number) AS state + FROM numbers(10000) + ) +); diff --git a/tests/queries/0_stateless/03153_dynamic_type_empty.reference b/tests/queries/0_stateless/03153_dynamic_type_empty.reference new file mode 100644 index 00000000000..f7c047dcd19 --- /dev/null +++ b/tests/queries/0_stateless/03153_dynamic_type_empty.reference @@ -0,0 +1,15 @@ +[] String +[1] Array(Int64) +[] Array(Int64) +['1'] Array(String) +[] Array(Int64) +() String +(1) Tuple(Int64) +(0) Tuple(Int64) +('1') Tuple(String) +(0) Tuple(Int64) +{} String +{1:2} Map(Int64, Int64) +{} Map(Int64, Int64) +{'1':'2'} Map(String, String) +{} Map(Int64, Int64) diff --git a/tests/queries/0_stateless/03153_dynamic_type_empty.sql b/tests/queries/0_stateless/03153_dynamic_type_empty.sql new file mode 100644 index 00000000000..8e942fe6f6e --- /dev/null +++ b/tests/queries/0_stateless/03153_dynamic_type_empty.sql @@ -0,0 +1,5 @@ +SET allow_experimental_dynamic_type=1; + +CREATE TABLE test_null_empty (d Dynamic) ENGINE = Memory; +INSERT INTO test_null_empty VALUES ([]), ([1]), ([]), (['1']), ([]), (()),((1)), (()), (('1')), (()), ({}), ({1:2}), ({}), ({'1':'2'}), ({}); +SELECT d, dynamicType(d) FROM test_null_empty; diff --git a/tests/queries/0_stateless/03155_analyzer_interpolate.reference b/tests/queries/0_stateless/03155_analyzer_interpolate.reference new file mode 100644 index 00000000000..791aaa5b2a2 --- /dev/null +++ b/tests/queries/0_stateless/03155_analyzer_interpolate.reference @@ -0,0 +1,13 @@ +0 [5] +0.5 [5] +1 [1] +1.5 [5] +2 [5] +2.5 [5] +3 [5] +3.5 [5] +4 [4] +4.5 [5] +5 [5] +5.5 [5] +7 [7] diff --git a/tests/queries/0_stateless/03155_analyzer_interpolate.sql b/tests/queries/0_stateless/03155_analyzer_interpolate.sql new file mode 100644 index 00000000000..b3c1d233f47 --- /dev/null +++ b/tests/queries/0_stateless/03155_analyzer_interpolate.sql @@ -0,0 +1,12 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/62464 +SET allow_experimental_analyzer = 1; + +SELECT n, [number] AS inter FROM ( + SELECT toFloat32(number % 10) AS n, number + FROM numbers(10) WHERE number % 3 = 1 +) GROUP BY n, inter ORDER BY n WITH FILL FROM 0 TO 5.51 STEP 0.5 INTERPOLATE (inter AS [5]); + +SELECT n, number+5 AS inter FROM ( -- { serverError NOT_AN_AGGREGATE } + SELECT toFloat32(number % 10) AS n, number, number*2 AS mn + FROM numbers(10) WHERE number % 3 = 1 +) GROUP BY n, inter ORDER BY n WITH FILL FROM 0 TO 5.51 STEP 0.5 INTERPOLATE (inter AS mn * 2); diff --git a/tests/queries/0_stateless/03156_analyzer_array_join_distributed.reference b/tests/queries/0_stateless/03156_analyzer_array_join_distributed.reference new file mode 100644 index 00000000000..b5b2aec9c12 --- /dev/null +++ b/tests/queries/0_stateless/03156_analyzer_array_join_distributed.reference @@ -0,0 +1,12 @@ +Hello [1,2] 1 +Hello [1,2] 2 +Hello [1,2] 1 +Hello [1,2] 1 +Hello [1,2] 2 +Hello [1,2] 2 +Hello 1 +Hello 2 +Hello 1 +Hello 1 +Hello 2 +Hello 2 diff --git a/tests/queries/0_stateless/03156_analyzer_array_join_distributed.sql b/tests/queries/0_stateless/03156_analyzer_array_join_distributed.sql new file mode 100644 index 00000000000..f605a369822 --- /dev/null +++ b/tests/queries/0_stateless/03156_analyzer_array_join_distributed.sql @@ -0,0 +1,10 @@ +CREATE TABLE arrays_test (s String, arr Array(UInt8)) ENGINE = MergeTree() ORDER BY (s); + +INSERT INTO arrays_test VALUES ('Hello', [1,2]), ('World', [3,4,5]), ('Goodbye', []); + +SELECT s, arr, a FROM remote('127.0.0.2', currentDatabase(), arrays_test) ARRAY JOIN arr AS a WHERE a < 3 ORDER BY a; +SELECT s, arr, a FROM remote('127.0.0.{1,2}', currentDatabase(), arrays_test) ARRAY JOIN arr AS a WHERE a < 3 ORDER BY a; + + +SELECT s, arr FROM remote('127.0.0.2', currentDatabase(), arrays_test) ARRAY JOIN arr WHERE arr < 3 ORDER BY arr; +SELECT s, arr FROM remote('127.0.0.{1,2}', currentDatabase(), arrays_test) ARRAY JOIN arr WHERE arr < 3 ORDER BY arr; diff --git a/tests/queries/0_stateless/03156_dynamic_type_concurrent_inserts.reference b/tests/queries/0_stateless/03156_dynamic_type_concurrent_inserts.reference new file mode 100644 index 00000000000..e1c7b69b136 --- /dev/null +++ b/tests/queries/0_stateless/03156_dynamic_type_concurrent_inserts.reference @@ -0,0 +1,7 @@ +Array(UInt64) 12000 10000 +Date 12000 10001 +Float64 12000 10000 +Int64 10000 10000 +Map(UInt64, String) 10000 10000 +String 10000 10000 +UInt64 4000 4000 diff --git a/tests/queries/0_stateless/03156_dynamic_type_concurrent_inserts.sh b/tests/queries/0_stateless/03156_dynamic_type_concurrent_inserts.sh new file mode 100755 index 00000000000..d7709b722c9 --- /dev/null +++ b/tests/queries/0_stateless/03156_dynamic_type_concurrent_inserts.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 -q "CREATE TABLE test_cc (d Dynamic) ENGINE = Memory" + + +$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 -q "INSERT INTO test_cc SELECT number::Int64 AS d FROM numbers(10000) SETTINGS max_threads=1,max_insert_threads=1" & +$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 -q "INSERT INTO test_cc SELECT toString(number) AS d FROM numbers(10000) SETTINGS max_threads=2,max_insert_threads=2" & +$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 -q "INSERT INTO test_cc SELECT toDate(number % 10000) AS d FROM numbers(10000) SETTINGS max_threads=3,max_insert_threads=3" & +$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 -q "INSERT INTO test_cc SELECT [number, number + 1] AS d FROM numbers(10000) SETTINGS max_threads=4,max_insert_threads=4" & +$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 -q "INSERT INTO test_cc SELECT toFloat64(number) AS d FROM numbers(10000) SETTINGS max_threads=5,max_insert_threads=5" & +$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 -q "INSERT INTO test_cc SELECT map(number, toString(number)) AS d FROM numbers(10000) SETTINGS max_threads=6,max_insert_threads=6" & + +$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 --use_variant_as_common_type=1 --allow_experimental_variant_type=1 -q "INSERT INTO test_cc SELECT CAST(multiIf(number % 5 = 0, toString(number), number % 5 = 1, number, number % 5 = 2, toFloat64(number), number % 5 = 3, toDate('2020-01-01'), [number, number + 1]), 'Dynamic') FROM numbers(10000) SETTINGS max_threads=6,max_insert_threads=6" & + +wait + +$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 -q "SELECT dynamicType(d) t, count(), uniqExact(d) FROM test_cc GROUP BY t ORDER BY t" diff --git a/tests/queries/0_stateless/03156_tuple_map_low_cardinality.reference b/tests/queries/0_stateless/03156_tuple_map_low_cardinality.reference new file mode 100644 index 00000000000..5b2a36927ee --- /dev/null +++ b/tests/queries/0_stateless/03156_tuple_map_low_cardinality.reference @@ -0,0 +1,6 @@ +100000 +100000 +100000 +100000 +100000 +100000 diff --git a/tests/queries/0_stateless/03156_tuple_map_low_cardinality.sql b/tests/queries/0_stateless/03156_tuple_map_low_cardinality.sql new file mode 100644 index 00000000000..836b426a9a9 --- /dev/null +++ b/tests/queries/0_stateless/03156_tuple_map_low_cardinality.sql @@ -0,0 +1,33 @@ +DROP TABLE IF EXISTS t_map_lc; + +CREATE TABLE t_map_lc +( + id UInt64, + t Tuple(m Map(LowCardinality(String), LowCardinality(String))) +) +ENGINE = MergeTree ORDER BY id SETTINGS min_bytes_for_wide_part = 0; + +INSERT INTO t_map_lc SELECT * FROM generateRandom('id UInt64, t Tuple(m Map(LowCardinality(String), LowCardinality(String)))') LIMIT 100000; + +SELECT count(), FROM t_map_lc WHERE NOT ignore(*, mapKeys(t.m)); +SELECT count(), FROM t_map_lc WHERE NOT ignore(*, t.m.keys); +SELECT count(), FROM t_map_lc WHERE NOT ignore(*, t.m.values); +SELECT * FROM t_map_lc WHERE mapContains(t.m, 'not_existing_key_1337'); + +DROP TABLE t_map_lc; + +CREATE TABLE t_map_lc +( + id UInt64, + t Tuple(m Map(LowCardinality(String), LowCardinality(String))) +) +ENGINE = MergeTree ORDER BY id SETTINGS min_bytes_for_wide_part = '10G'; + +INSERT INTO t_map_lc SELECT * FROM generateRandom('id UInt64, t Tuple(m Map(LowCardinality(String), LowCardinality(String)))') LIMIT 100000; + +SELECT count(), FROM t_map_lc WHERE NOT ignore(*, mapKeys(t.m)); +SELECT count(), FROM t_map_lc WHERE NOT ignore(*, t.m.keys); +SELECT count(), FROM t_map_lc WHERE NOT ignore(*, t.m.values); +SELECT * FROM t_map_lc WHERE mapContains(t.m, 'not_existing_key_1337'); + +DROP TABLE t_map_lc; diff --git a/tests/queries/0_stateless/03157_dynamic_type_json.reference b/tests/queries/0_stateless/03157_dynamic_type_json.reference new file mode 100644 index 00000000000..38bca12bb95 --- /dev/null +++ b/tests/queries/0_stateless/03157_dynamic_type_json.reference @@ -0,0 +1,5 @@ +1 (((((((((('deep_value')))))))))) +2 (((((((((('deep_array_value')))))))))) + +(((((((((('deep_value')))))))))) Tuple(level1 Tuple(level2 Tuple(level3 Tuple(level4 Tuple(level5 Tuple(level6 Tuple(level7 Tuple(level8 Tuple(level9 Tuple(level10 String)))))))))) +(((((((((('deep_array_value')))))))))) Tuple(level1 Tuple(level2 Tuple(level3 Tuple(level4 Tuple(level5 Tuple(level6 Tuple(level7 Tuple(level8 Tuple(level9 Tuple(level10 String)))))))))) diff --git a/tests/queries/0_stateless/03157_dynamic_type_json.sql b/tests/queries/0_stateless/03157_dynamic_type_json.sql new file mode 100644 index 00000000000..cb1a5987104 --- /dev/null +++ b/tests/queries/0_stateless/03157_dynamic_type_json.sql @@ -0,0 +1,13 @@ +SET allow_experimental_dynamic_type=1; +SET allow_experimental_object_type=1; +SET allow_experimental_variant_type=1; + +CREATE TABLE test_deep_nested_json (i UInt16, d JSON) ENGINE = Memory; + +INSERT INTO test_deep_nested_json VALUES (1, '{"level1": {"level2": {"level3": {"level4": {"level5": {"level6": {"level7": {"level8": {"level9": {"level10": "deep_value"}}}}}}}}}}'); +INSERT INTO test_deep_nested_json VALUES (2, '{"level1": {"level2": {"level3": {"level4": {"level5": {"level6": {"level7": {"level8": {"level9": {"level10": "deep_array_value"}}}}}}}}}}'); + +SELECT * FROM test_deep_nested_json ORDER BY i; + +SELECT ''; +SELECT d::Dynamic d1, dynamicType(d1) FROM test_deep_nested_json ORDER BY i; diff --git a/tests/queries/0_stateless/03158_dynamic_type_from_variant.reference b/tests/queries/0_stateless/03158_dynamic_type_from_variant.reference new file mode 100644 index 00000000000..2ede006cedc --- /dev/null +++ b/tests/queries/0_stateless/03158_dynamic_type_from_variant.reference @@ -0,0 +1,17 @@ +false Variant(Bool, DateTime64(3), IPv6, String, UInt32) +false Variant(Bool, DateTime64(3), IPv6, String, UInt32) +true Variant(Bool, DateTime64(3), IPv6, String, UInt32) +2001-01-01 01:01:01.111 Variant(Bool, DateTime64(3), IPv6, String, UInt32) +s Variant(Bool, DateTime64(3), IPv6, String, UInt32) +0 Variant(Bool, DateTime64(3), IPv6, String, UInt32) +1 Variant(Bool, DateTime64(3), IPv6, String, UInt32) +\N Variant(Bool, DateTime64(3), IPv6, String, UInt32) + +false Bool +false Bool +true Bool +2001-01-01 01:01:01.111 DateTime64(3) +s String +0 UInt32 +1 UInt32 +\N None diff --git a/tests/queries/0_stateless/03158_dynamic_type_from_variant.sql b/tests/queries/0_stateless/03158_dynamic_type_from_variant.sql new file mode 100644 index 00000000000..20a9e17a148 --- /dev/null +++ b/tests/queries/0_stateless/03158_dynamic_type_from_variant.sql @@ -0,0 +1,15 @@ +SET allow_experimental_dynamic_type=1; +SET allow_experimental_object_type=1; +SET allow_experimental_variant_type=1; + +CREATE TABLE test_variable (v Variant(String, UInt32, IPv6, Bool, DateTime64)) ENGINE = Memory; +CREATE TABLE test_dynamic (d Dynamic) ENGINE = Memory; + +INSERT INTO test_variable VALUES (1), ('s'), (0), ('0'), ('true'), ('false'), ('2001-01-01 01:01:01.111'), (NULL); + +SELECT v, toTypeName(v) FROM test_variable ORDER BY v; + +INSERT INTO test_dynamic SELECT * FROM test_variable; + +SELECT ''; +SELECT d, dynamicType(d) FROM test_dynamic ORDER BY d; diff --git a/tests/queries/0_stateless/03159_dynamic_type_all_types.reference b/tests/queries/0_stateless/03159_dynamic_type_all_types.reference new file mode 100644 index 00000000000..72c5b90dbba --- /dev/null +++ b/tests/queries/0_stateless/03159_dynamic_type_all_types.reference @@ -0,0 +1,292 @@ +Array(Dynamic) [] +Array(Array(Dynamic)) [[]] +Array(Array(Array(Dynamic))) [[[]]] +Bool false +Bool true +Date 2022-01-01 +Date32 2022-01-01 +DateTime 2022-01-01 01:01:01 +DateTime64(3) 2022-01-01 01:01:01.011 +Decimal(9, 1) -99999999.9 +Decimal(18, 2) -999999999.99 +Decimal(38, 3) -999999999.999 +Decimal(76, 4) -999999999.9999 +Float32 -inf +Float32 -inf +Float32 -inf +Float32 -3.4028233e38 +Float32 -1.1754942e-38 +Float32 -1e-45 +Float32 1e-45 +Float32 1.1754942e-38 +Float32 3.4028233e38 +Float32 inf +Float32 inf +Float32 inf +Float32 nan +Float32 nan +Float32 nan +Float64 -inf +Float64 -inf +Float64 -inf +Float64 -1.7976931348623157e308 +Float64 -3.40282347e38 +Float64 -1.1754943499999998e-38 +Float64 -1.3999999999999999e-45 +Float64 -2.2250738585072014e-308 +Float64 2.2250738585072014e-308 +Float64 1.3999999999999999e-45 +Float64 1.1754943499999998e-38 +Float64 3.40282347e38 +Float64 1.7976931348623157e308 +Float64 inf +Float64 inf +Float64 inf +Float64 nan +Float64 nan +Float64 nan +FixedString(1) 1 +FixedString(2) 1\0 +FixedString(10) 1\0\0\0\0\0\0\0\0\0 +IPv4 192.168.0.1 +IPv6 ::1 +Int8 -128 +Int8 -128 +Int8 -127 +Int8 -127 +Int8 -1 +Int8 -1 +Int8 0 +Int8 0 +Int8 1 +Int8 1 +Int8 126 +Int8 126 +Int8 127 +Int8 127 +Int16 -32768 +Int16 -32767 +Int16 -1 +Int16 0 +Int16 1 +Int16 32766 +Int16 32767 +Int32 -2147483648 +Int32 -2147483647 +Int32 -1 +Int32 0 +Int32 1 +Int32 2147483646 +Int32 2147483647 +Int64 -9223372036854775808 +Int64 -9223372036854775807 +Int64 -1 +Int64 0 +Int64 1 +Int64 9223372036854775806 +Int64 9223372036854775807 +Int128 -170141183460469231731687303715884105728 +Int128 -170141183460469231731687303715884105727 +Int128 -1 +Int128 0 +Int128 1 +Int128 170141183460469231731687303715884105726 +Int128 170141183460469231731687303715884105727 +Int256 -57896044618658097711785492504343953926634992332820282019728792003956564819968 +Int256 -57896044618658097711785492504343953926634992332820282019728792003956564819967 +Int256 -1 +Int256 0 +Int256 1 +Int256 57896044618658097711785492504343953926634992332820282019728792003956564819966 +Int256 57896044618658097711785492504343953926634992332820282019728792003956564819967 +IntervalDay 1 +IntervalYear 3 +IntervalMonth 2 +LowCardinality(String) 1 +LowCardinality(String) 1 +LowCardinality(UInt16) 0 +MultiPolygon [[[(0,0),(10,0),(10,10),(0,10)]],[[(20,20),(50,20),(50,50),(20,50)],[(30,30),(50,50),(50,30)]]] +Map(Dynamic, Dynamic) {'11':'v1','22':'1'} +Nested(x UInt32, y String) [(1,'aa'),(2,'bb')] +Nested(x UInt32, y Tuple(y1 UInt32, y2 Array(String)), z Nested(z1 UInt32, z2 String)) [(1,(2,['aa','bb']),[(3,'cc'),(4,'dd')]),(5,(6,['ee','ff']),[(7,'gg'),(8,'hh')])] +Point (1.23,4.5600000000000005) +Ring [(1.23,4.5600000000000005),(2.34,5.67)] +String string +SimpleAggregateFunction(anyLast, Array(Int16)) [1,2] +Tuple(Dynamic) ('') +Tuple(Tuple(Dynamic)) (('')) +Tuple(Tuple(Tuple(Dynamic))) (((''))) +UUID 00000000-0000-0000-0000-000000000000 +UUID dededdb6-7835-4ce4-8d11-b5de6f2820e9 +UInt8 0 +UInt8 1 +UInt8 254 +UInt8 255 +UInt16 0 +UInt16 1 +UInt16 65534 +UInt16 65535 +UInt32 0 +UInt32 1 +UInt32 4294967294 +UInt32 4294967295 +UInt64 0 +UInt64 1 +UInt64 18446744073709551614 +UInt64 18446744073709551615 +UInt128 0 +UInt128 1 +UInt128 340282366920938463463374607431768211454 +UInt128 340282366920938463463374607431768211455 +UInt256 0 +UInt256 1 +UInt256 115792089237316195423570985008687907853269984665640564039457584007913129639934 +UInt256 115792089237316195423570985008687907853269984665640564039457584007913129639935 + +Array(Dynamic) [] +Array(Array(Dynamic)) [[]] +Array(Array(Array(Dynamic))) [[[]]] +Bool false +Bool true +Date 2022-01-01 +Date32 2022-01-01 +DateTime 2022-01-01 01:01:01 +DateTime64(3) 2022-01-01 01:01:01.011 +Decimal(9, 1) -99999999.9 +Decimal(18, 2) -999999999.99 +Decimal(38, 3) -999999999.999 +Decimal(76, 4) -999999999.9999 +Float32 -inf +Float32 -inf +Float32 -inf +Float32 -3.4028233e38 +Float32 -1.1754942e-38 +Float32 -1e-45 +Float32 1e-45 +Float32 1.1754942e-38 +Float32 3.4028233e38 +Float32 inf +Float32 inf +Float32 inf +Float32 nan +Float32 nan +Float32 nan +Float64 -inf +Float64 -inf +Float64 -inf +Float64 -1.7976931348623157e308 +Float64 -3.40282347e38 +Float64 -1.1754943499999998e-38 +Float64 -1.3999999999999999e-45 +Float64 -2.2250738585072014e-308 +Float64 2.2250738585072014e-308 +Float64 1.3999999999999999e-45 +Float64 1.1754943499999998e-38 +Float64 3.40282347e38 +Float64 1.7976931348623157e308 +Float64 inf +Float64 inf +Float64 inf +Float64 nan +Float64 nan +Float64 nan +FixedString(1) 1 +FixedString(2) 1\0 +FixedString(10) 1\0\0\0\0\0\0\0\0\0 +IPv4 192.168.0.1 +IPv6 ::1 +Int8 -128 +Int8 -128 +Int8 -127 +Int8 -127 +Int8 -1 +Int8 -1 +Int8 0 +Int8 0 +Int8 1 +Int8 1 +Int8 126 +Int8 126 +Int8 127 +Int8 127 +Int16 -32768 +Int16 -32767 +Int16 -1 +Int16 0 +Int16 1 +Int16 32766 +Int16 32767 +Int32 -2147483648 +Int32 -2147483647 +Int32 -1 +Int32 0 +Int32 1 +Int32 2147483646 +Int32 2147483647 +Int64 -9223372036854775808 +Int64 -9223372036854775807 +Int64 -1 +Int64 0 +Int64 1 +Int64 9223372036854775806 +Int64 9223372036854775807 +Int128 -170141183460469231731687303715884105728 +Int128 -170141183460469231731687303715884105727 +Int128 -1 +Int128 0 +Int128 1 +Int128 170141183460469231731687303715884105726 +Int128 170141183460469231731687303715884105727 +Int256 -57896044618658097711785492504343953926634992332820282019728792003956564819968 +Int256 -57896044618658097711785492504343953926634992332820282019728792003956564819967 +Int256 -1 +Int256 0 +Int256 1 +Int256 57896044618658097711785492504343953926634992332820282019728792003956564819966 +Int256 57896044618658097711785492504343953926634992332820282019728792003956564819967 +IntervalDay 1 +IntervalYear 3 +IntervalMonth 2 +LowCardinality(String) 1 +LowCardinality(String) 1 +LowCardinality(UInt16) 0 +MultiPolygon [[[(0,0),(10,0),(10,10),(0,10)]],[[(20,20),(50,20),(50,50),(20,50)],[(30,30),(50,50),(50,30)]]] +Map(Dynamic, Dynamic) {'11':'v1','22':'1'} +Nested(x UInt32, y String) [(1,'aa'),(2,'bb')] +Nested(x UInt32, y Tuple(y1 UInt32, y2 Array(String)), z Nested(z1 UInt32, z2 String)) [(1,(2,['aa','bb']),[(3,'cc'),(4,'dd')]),(5,(6,['ee','ff']),[(7,'gg'),(8,'hh')])] +Point (1.23,4.5600000000000005) +Ring [(1.23,4.5600000000000005),(2.34,5.67)] +String string +SimpleAggregateFunction(anyLast, Array(Int16)) [1,2] +Tuple(Dynamic) ('') +Tuple(Tuple(Dynamic)) (('')) +Tuple(Tuple(Tuple(Dynamic))) (((''))) +UUID 00000000-0000-0000-0000-000000000000 +UUID dededdb6-7835-4ce4-8d11-b5de6f2820e9 +UInt8 0 +UInt8 1 +UInt8 254 +UInt8 255 +UInt16 0 +UInt16 1 +UInt16 65534 +UInt16 65535 +UInt32 0 +UInt32 1 +UInt32 4294967294 +UInt32 4294967295 +UInt64 0 +UInt64 1 +UInt64 18446744073709551614 +UInt64 18446744073709551615 +UInt128 0 +UInt128 1 +UInt128 340282366920938463463374607431768211454 +UInt128 340282366920938463463374607431768211455 +UInt256 0 +UInt256 1 +UInt256 115792089237316195423570985008687907853269984665640564039457584007913129639934 +UInt256 115792089237316195423570985008687907853269984665640564039457584007913129639935 + +48 +48 diff --git a/tests/queries/0_stateless/03159_dynamic_type_all_types.sql b/tests/queries/0_stateless/03159_dynamic_type_all_types.sql new file mode 100644 index 00000000000..d302205ca23 --- /dev/null +++ b/tests/queries/0_stateless/03159_dynamic_type_all_types.sql @@ -0,0 +1,95 @@ +-- Tags: no-random-settings + +SET allow_experimental_dynamic_type=1; +SET allow_experimental_object_type=1; +SET allow_experimental_variant_type=1; +SET allow_suspicious_low_cardinality_types=1; + + +CREATE TABLE t (d Dynamic(max_types=255)) ENGINE = Memory; +-- Integer types: signed and unsigned integers (UInt8, UInt16, UInt32, UInt64, UInt128, UInt256, Int8, Int16, Int32, Int64, Int128, Int256) +INSERT INTO t VALUES (-128::Int8), (-127::Int8), (-1::Int8), (0::Int8), (1::Int8), (126::Int8), (127::Int8); +INSERT INTO t VALUES (-128::Int8), (-127::Int8), (-1::Int8), (0::Int8), (1::Int8), (126::Int8), (127::Int8); +INSERT INTO t VALUES (-32768::Int16), (-32767::Int16), (-1::Int16), (0::Int16), (1::Int16), (32766::Int16), (32767::Int16); +INSERT INTO t VALUES (-2147483648::Int32), (-2147483647::Int32), (-1::Int32), (0::Int32), (1::Int32), (2147483646::Int32), (2147483647::Int32); +INSERT INTO t VALUES (-9223372036854775808::Int64), (-9223372036854775807::Int64), (-1::Int64), (0::Int64), (1::Int64), (9223372036854775806::Int64), (9223372036854775807::Int64); +INSERT INTO t VALUES (-170141183460469231731687303715884105728::Int128), (-170141183460469231731687303715884105727::Int128), (-1::Int128), (0::Int128), (1::Int128), (170141183460469231731687303715884105726::Int128), (170141183460469231731687303715884105727::Int128); +INSERT INTO t VALUES (-57896044618658097711785492504343953926634992332820282019728792003956564819968::Int256), (-57896044618658097711785492504343953926634992332820282019728792003956564819967::Int256), (-1::Int256), (0::Int256), (1::Int256), (57896044618658097711785492504343953926634992332820282019728792003956564819966::Int256), (57896044618658097711785492504343953926634992332820282019728792003956564819967::Int256); + +INSERT INTO t VALUES (0::UInt8), (1::UInt8), (254::UInt8), (255::UInt8); +INSERT INTO t VALUES (0::UInt16), (1::UInt16), (65534::UInt16), (65535::UInt16); +INSERT INTO t VALUES (0::UInt32), (1::UInt32), (4294967294::UInt32), (4294967295::UInt32); +INSERT INTO t VALUES (0::UInt64), (1::UInt64), (18446744073709551614::UInt64), (18446744073709551615::UInt64); +INSERT INTO t VALUES (0::UInt128), (1::UInt128), (340282366920938463463374607431768211454::UInt128), (340282366920938463463374607431768211455::UInt128); +INSERT INTO t VALUES (0::UInt256), (1::UInt256), (115792089237316195423570985008687907853269984665640564039457584007913129639934::UInt256), (115792089237316195423570985008687907853269984665640564039457584007913129639935::UInt256); + +-- Floating-point numbers: floats(Float32 and Float64) and Decimal values +INSERT INTO t VALUES (1.17549435e-38::Float32), (3.40282347e+38::Float32), (-3.40282347e+38::Float32), (-1.17549435e-38::Float32), (1.4e-45::Float32), (-1.4e-45::Float32); +INSERT INTO t VALUES (inf::Float32), (-inf::Float32), (nan::Float32); +INSERT INTO t VALUES (inf::FLOAT(12)), (-inf::FLOAT(12)), (nan::FLOAT(12)); +INSERT INTO t VALUES (inf::FLOAT(15,22)), (-inf::FLOAT(15,22)), (nan::FLOAT(15,22)); + +INSERT INTO t VALUES (1.17549435e-38::Float64), (3.40282347e+38::Float64), (-3.40282347e+38::Float64), (-1.17549435e-38::Float64), (1.4e-45::Float64), (-1.4e-45::Float64); +INSERT INTO t VALUES (2.2250738585072014e-308::Float64), (1.7976931348623157e+308::Float64), (-1.7976931348623157e+308::Float64), (-2.2250738585072014e-308::Float64); +INSERT INTO t VALUES (inf::Float64), (-inf::Float64), (nan::Float64); +INSERT INTO t VALUES (inf::DOUBLE(12)), (-inf::DOUBLE(12)), (nan::DOUBLE(12)); +INSERT INTO t VALUES (inf::DOUBLE(15,22)), (-inf::DOUBLE(15,22)), (nan::DOUBLE(15,22)); + +INSERT INTO t VALUES (-99999999.9::Decimal32(1)); +INSERT INTO t VALUES (-999999999.99::Decimal64(2)); +INSERT INTO t VALUES (-999999999.999::Decimal128(3)); +INSERT INTO t VALUES (-999999999.9999::Decimal256(4)); + +-- Strings: String and FixedString +INSERT INTO t VALUES ('string'::String), ('1'::FixedString(1)), ('1'::FixedString(2)), ('1'::FixedString(10)); --(''::String), + +-- Boolean +INSERT INTO t VALUES ('1'::Bool), (0::Bool); + +-- Dates: use Date and Date32 for days, and DateTime and DateTime64 for instances in time +INSERT INTO t VALUES ('2022-01-01'::Date), ('2022-01-01'::Date32), ('2022-01-01 01:01:01'::DateTime), ('2022-01-01 01:01:01.011'::DateTime64); + +-- UUID +INSERT INTO t VALUES ('dededdb6-7835-4ce4-8d11-b5de6f2820e9'::UUID); +INSERT INTO t VALUES ('00000000-0000-0000-0000-000000000000'::UUID); + +-- LowCardinality +INSERT INTO t VALUES ('1'::LowCardinality(String)), ('1'::LowCardinality(String)), (0::LowCardinality(UInt16)); + +-- Arrays +INSERT INTO t VALUES ([]::Array(Dynamic)), ([[]]::Array(Array(Dynamic))), ([[[]]]::Array(Array(Array(Dynamic)))); + +-- Tuple +INSERT INTO t VALUES (()::Tuple(Dynamic)), ((())::Tuple(Tuple(Dynamic))), (((()))::Tuple(Tuple(Tuple(Dynamic)))); + +-- Map. +INSERT INTO t VALUES (map(11::Dynamic, 'v1'::Dynamic, '22'::Dynamic, 1::Dynamic)); + +-- SimpleAggregateFunction +INSERT INTO t VALUES ([1,2]::SimpleAggregateFunction(anyLast, Array(Int16))); + +-- IPs +INSERT INTO t VALUES (toIPv4('192.168.0.1')), (toIPv6('::1')); + +-- Geo +INSERT INTO t VALUES ((1.23, 4.56)::Point), (([(1.23, 4.56)::Point, (2.34, 5.67)::Point])::Ring); +INSERT INTO t VALUES ([[[(0, 0), (10, 0), (10, 10), (0, 10)]], [[(20, 20), (50, 20), (50, 50), (20, 50)],[(30, 30), (50, 50), (50, 30)]]]::MultiPolygon); + +-- Interval +INSERT INTO t VALUES (interval '1' day), (interval '2' month), (interval '3' year); + +-- Nested +INSERT INTO t VALUES ([(1, 'aa'), (2, 'bb')]::Nested(x UInt32, y String)); +INSERT INTO t VALUES ([(1, (2, ['aa', 'bb']), [(3, 'cc'), (4, 'dd')]), (5, (6, ['ee', 'ff']), [(7, 'gg'), (8, 'hh')])]::Nested(x UInt32, y Tuple(y1 UInt32, y2 Array(String)), z Nested(z1 UInt32, z2 String))); + +SELECT dynamicType(d), d FROM t ORDER BY substring(dynamicType(d),1,1), length(dynamicType(d)), d; + +CREATE TABLE t2 (d Dynamic(max_types=255)) ENGINE = Memory; +INSERT INTO t2 SELECT * FROM t; + +SELECT ''; +SELECT dynamicType(d), d FROM t2 ORDER BY substring(dynamicType(d),1,1), length(dynamicType(d)), d; + +SELECT ''; +SELECT uniqExact(dynamicType(d)) t_ FROM t; +SELECT uniqExact(dynamicType(d)) t_ FROM t2; diff --git a/tests/queries/0_stateless/03160_dynamic_type_agg.reference b/tests/queries/0_stateless/03160_dynamic_type_agg.reference new file mode 100644 index 00000000000..54f6e428839 --- /dev/null +++ b/tests/queries/0_stateless/03160_dynamic_type_agg.reference @@ -0,0 +1 @@ +4950 4950 diff --git a/tests/queries/0_stateless/03160_dynamic_type_agg.sql b/tests/queries/0_stateless/03160_dynamic_type_agg.sql new file mode 100644 index 00000000000..f99232031a8 --- /dev/null +++ b/tests/queries/0_stateless/03160_dynamic_type_agg.sql @@ -0,0 +1,10 @@ +SET allow_experimental_dynamic_type=1; + +CREATE TABLE t (d Dynamic) ENGINE = Memory; + +INSERT INTO t SELECT sumState(number) AS d FROM numbers(100); + +SELECT finalizeAggregation(d.`AggregateFunction(sum, UInt64)`), + sumMerge(d.`AggregateFunction(sum, UInt64)`) +FROM t GROUP BY d.`AggregateFunction(sum, UInt64)`; + diff --git a/tests/queries/0_stateless/03161_cnf_reduction.reference b/tests/queries/0_stateless/03161_cnf_reduction.reference new file mode 100644 index 00000000000..5e39c0f3223 --- /dev/null +++ b/tests/queries/0_stateless/03161_cnf_reduction.reference @@ -0,0 +1,23 @@ +-- Expected plan with analyzer: +SELECT id +FROM `03161_table` +WHERE f +SETTINGS convert_query_to_cnf = 1, optimize_using_constraints = 1, allow_experimental_analyzer = 1 + +-- Expected result with analyzer: +1 + +-- Expected plan w/o analyzer: +SELECT id +FROM `03161_table` +WHERE f +SETTINGS convert_query_to_cnf = 1, optimize_using_constraints = 1, allow_experimental_analyzer = 0 + +-- Expected result w/o analyzer: +1 + +-- Reproducer from the issue with analyzer +2 + +-- Reproducer from the issue w/o analyzer +2 diff --git a/tests/queries/0_stateless/03161_cnf_reduction.sql b/tests/queries/0_stateless/03161_cnf_reduction.sql new file mode 100644 index 00000000000..b34e9171d45 --- /dev/null +++ b/tests/queries/0_stateless/03161_cnf_reduction.sql @@ -0,0 +1,72 @@ +DROP TABLE IF EXISTS 03161_table; + +CREATE TABLE 03161_table (id UInt32, f UInt8) ENGINE = Memory; + +INSERT INTO 03161_table VALUES (0, 0), (1, 1), (2, 0); + +SELECT '-- Expected plan with analyzer:'; + +EXPLAIN SYNTAX +SELECT id +FROM 03161_table +WHERE f AND (NOT(f) OR f) +SETTINGS convert_query_to_cnf = 1, optimize_using_constraints = 1, allow_experimental_analyzer = 1; + +SELECT ''; + +SELECT '-- Expected result with analyzer:'; + +SELECT id +FROM 03161_table +WHERE f AND (NOT(f) OR f) +SETTINGS convert_query_to_cnf = 1, optimize_using_constraints = 1, allow_experimental_analyzer = 1; + +SELECT ''; + +SELECT '-- Expected plan w/o analyzer:'; + +EXPLAIN SYNTAX +SELECT id +FROM 03161_table +WHERE f AND (NOT(f) OR f) +SETTINGS convert_query_to_cnf = 1, optimize_using_constraints = 1, allow_experimental_analyzer = 0; + +SELECT ''; + +SELECT '-- Expected result w/o analyzer:'; + +SELECT id +FROM 03161_table +WHERE f AND (NOT(f) OR f) +SETTINGS convert_query_to_cnf = 1, optimize_using_constraints = 1, allow_experimental_analyzer = 0; + +DROP TABLE IF EXISTS 03161_table; + +-- Checking reproducer from GitHub issue +-- https://github.com/ClickHouse/ClickHouse/issues/57400 + +DROP TABLE IF EXISTS 03161_reproducer; + +CREATE TABLE 03161_reproducer (c0 UInt8, c1 UInt8, c2 UInt8, c3 UInt8, c4 UInt8, c5 UInt8, c6 UInt8, c7 UInt8, c8 UInt8, c9 UInt8) ENGINE = Memory; + +INSERT INTO 03161_reproducer VALUES (0, 0, 0, 0, 0, 0, 0, 0, 0, 0), (0, 0, 0, 0, 0, 0, 0, 0, 0, 1), (0, 0, 0, 0, 0, 0, 0, 0, 1, 0), (0, 0, 0, 0, 0, 0, 0, 0, 1, 1), (0, 0, 0, 0, 0, 0, 0, 1, 0, 0), (0, 0, 0, 0, 0, 0, 0, 1, 0, 1), (0, 0, 0, 0, 0, 0, 0, 1, 1, 0), (0, 0, 0, 0, 0, 0, 0, 1, 1, 1); + +SELECT ''; + +SELECT '-- Reproducer from the issue with analyzer'; + +SELECT count() +FROM 03161_reproducer +WHERE ((NOT c2) AND c2 AND (NOT c1)) OR ((NOT c2) AND c3 AND (NOT c5)) OR ((NOT c7) AND (NOT c8)) OR (c9 AND c6 AND c8 AND (NOT c8) AND (NOT c7)) +SETTINGS convert_query_to_cnf = 1, optimize_using_constraints = 1, allow_experimental_analyzer = 1; + +SELECT ''; + +SELECT '-- Reproducer from the issue w/o analyzer'; + +SELECT count() +FROM 03161_reproducer +WHERE ((NOT c2) AND c2 AND (NOT c1)) OR ((NOT c2) AND c3 AND (NOT c5)) OR ((NOT c7) AND (NOT c8)) OR (c9 AND c6 AND c8 AND (NOT c8) AND (NOT c7)) +SETTINGS convert_query_to_cnf = 1, optimize_using_constraints = 1, allow_experimental_analyzer = 0; + +DROP TABLE IF EXISTS 03161_reproducer; diff --git a/tests/queries/0_stateless/03161_create_table_as_mv.reference b/tests/queries/0_stateless/03161_create_table_as_mv.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03161_create_table_as_mv.sql b/tests/queries/0_stateless/03161_create_table_as_mv.sql new file mode 100644 index 00000000000..e80659ac923 --- /dev/null +++ b/tests/queries/0_stateless/03161_create_table_as_mv.sql @@ -0,0 +1,14 @@ +DROP TABLE IF EXISTS base_table; +DROP TABLE IF EXISTS target_table; +DROP TABLE IF EXISTS mv_from_base_to_target; +DROP TABLE IF EXISTS mv_with_storage; +DROP TABLE IF EXISTS other_table_1; +DROP TABLE IF EXISTS other_table_2; + +CREATE TABLE base_table (date DateTime, id String, cost Float64) ENGINE = MergeTree() ORDER BY date; +CREATE TABLE target_table (id String, total AggregateFunction(sum, Float64)) ENGINE = MergeTree() ORDER BY id; +CREATE MATERIALIZED VIEW mv_from_base_to_target TO target_table AS Select id, sumState(cost) FROM base_table GROUP BY id; +CREATE MATERIALIZED VIEW mv_with_storage ENGINE=MergeTree() ORDER BY id AS Select id, sumState(cost) FROM base_table GROUP BY id; + +CREATE TABLE other_table_1 AS mv_with_storage; +CREATE TABLE other_table_2 AS mv_from_base_to_target; -- { serverError INCORRECT_QUERY } diff --git a/tests/queries/0_stateless/03161_lightweight_delete_projection.reference b/tests/queries/0_stateless/03161_lightweight_delete_projection.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql new file mode 100644 index 00000000000..cd29fae8fd7 --- /dev/null +++ b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql @@ -0,0 +1,15 @@ + +DROP TABLE IF EXISTS users; + +CREATE TABLE users ( + uid Int16, + name String, + age Int16, + projection p1 (select count(), age group by age) +) ENGINE = MergeTree order by uid; + +INSERT INTO users VALUES (1231, 'John', 33); +INSERT INTO users VALUES (6666, 'Ksenia', 48); +INSERT INTO users VALUES (8888, 'Alice', 50); + +DELETE FROM users WHERE 1; -- { serverError NOT_IMPLEMENTED } diff --git a/tests/queries/0_stateless/03162_dynamic_type_nested.reference b/tests/queries/0_stateless/03162_dynamic_type_nested.reference new file mode 100644 index 00000000000..8d5bcb5f85a --- /dev/null +++ b/tests/queries/0_stateless/03162_dynamic_type_nested.reference @@ -0,0 +1,4 @@ + ┌─dynamicType(d)──────────────┬─d─────────────────────────────────────────┬─d.Nested(x UInt32, y Dynamic).x─┬─d.Nested(x UInt32, y Dynamic).y───┬─dynamicType(arrayElement(d.Nested(x UInt32, y Dynamic).y, 1))─┬─d.Nested(x UInt32, y Dynamic).y.String─┬─d.Nested(x UInt32, y Dynamic).y.Tuple(Int64, Array(String))─┐ +1. │ Nested(x UInt32, y Dynamic) │ [(1,'aa'),(2,'bb')] │ [1,2] │ ['aa','bb'] │ String │ ['aa','bb'] │ [(0,[]),(0,[])] │ +2. │ Nested(x UInt32, y Dynamic) │ [(1,(2,['aa','bb'])),(5,(6,['ee','ff']))] │ [1,5] │ [(2,['aa','bb']),(6,['ee','ff'])] │ Tuple(Int64, Array(String)) │ [NULL,NULL] │ [(2,['aa','bb']),(6,['ee','ff'])] │ + └─────────────────────────────┴───────────────────────────────────────────┴─────────────────────────────────┴───────────────────────────────────┴───────────────────────────────────────────────────────────────┴────────────────────────────────────────┴─────────────────────────────────────────────────────────────┘ diff --git a/tests/queries/0_stateless/03162_dynamic_type_nested.sql b/tests/queries/0_stateless/03162_dynamic_type_nested.sql new file mode 100644 index 00000000000..94007459a9e --- /dev/null +++ b/tests/queries/0_stateless/03162_dynamic_type_nested.sql @@ -0,0 +1,16 @@ +SET allow_experimental_dynamic_type=1; + +CREATE TABLE t (d Dynamic) ENGINE = Memory; + +INSERT INTO t VALUES ([(1, 'aa'), (2, 'bb')]::Nested(x UInt32, y Dynamic)) ; +INSERT INTO t VALUES ([(1, (2, ['aa', 'bb'])), (5, (6, ['ee', 'ff']))]::Nested(x UInt32, y Dynamic)); + +SELECT dynamicType(d), + d, + d.`Nested(x UInt32, y Dynamic)`.x, + d.`Nested(x UInt32, y Dynamic)`.y, + dynamicType(d.`Nested(x UInt32, y Dynamic)`.y[1]), + d.`Nested(x UInt32, y Dynamic)`.y.`String`, + d.`Nested(x UInt32, y Dynamic)`.y.`Tuple(Int64, Array(String))` +FROM t ORDER BY d +FORMAT PrettyCompactMonoBlock; diff --git a/tests/queries/0_stateless/03163_dynamic_as_supertype.reference b/tests/queries/0_stateless/03163_dynamic_as_supertype.reference new file mode 100644 index 00000000000..33e3a15c7fb --- /dev/null +++ b/tests/queries/0_stateless/03163_dynamic_as_supertype.reference @@ -0,0 +1,10 @@ +str_0 Dynamic(max_types=3) String +1 Dynamic(max_types=3) UInt64 +str_2 Dynamic(max_types=3) String +3 Dynamic(max_types=3) UInt64 +[1,2,3] Array(Int64) +2020-01-01 Date +str_1 String +str_2 String +42 UInt64 +43 UInt64 diff --git a/tests/queries/0_stateless/03163_dynamic_as_supertype.sql b/tests/queries/0_stateless/03163_dynamic_as_supertype.sql new file mode 100644 index 00000000000..baba637eea4 --- /dev/null +++ b/tests/queries/0_stateless/03163_dynamic_as_supertype.sql @@ -0,0 +1,8 @@ +SET allow_experimental_dynamic_type=1; +SELECT if(number % 2, number::Dynamic(max_types=3), ('str_' || toString(number))::Dynamic(max_types=2)) AS d, toTypeName(d), dynamicType(d) FROM numbers(4); +CREATE TABLE dynamic_test_1 (d Dynamic(max_types=3)) ENGINE = Memory; +INSERT INTO dynamic_test_1 VALUES ('str_1'), (42::UInt64); +CREATE TABLE dynamic_test_2 (d Dynamic(max_types=5)) ENGINE = Memory; +INSERT INTO dynamic_test_2 VALUES ('str_2'), (43::UInt64), ('2020-01-01'::Date), ([1, 2, 3]); +SELECT * FROM (SELECT d, dynamicType(d) FROM dynamic_test_1 UNION ALL SELECT d, dynamicType(d) FROM dynamic_test_2) order by d; + diff --git a/tests/queries/0_stateless/data_parquet/native_parquet_reader.parquet b/tests/queries/0_stateless/data_parquet/native_parquet_reader.parquet new file mode 100644 index 00000000000..c0d222342e3 Binary files /dev/null and b/tests/queries/0_stateless/data_parquet/native_parquet_reader.parquet differ diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 60384125ec5..8f8d74f39ad 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -6,6 +6,7 @@ AMPLab AMQP ANNIndex ANNIndexes +ANOVA AORM APIs ARMv @@ -29,13 +30,6 @@ Alexey AnyEvent AppleClang Approximative -arrayDotProduct -arrayEnumerateDenseRanked -arrayEnumerateUniqRanked -arrayFirstOrNull -arrayLastOrNull -arrayPartialShuffle -arrayShuffle ArrayJoin ArrowStream AsyncInsertCacheSize @@ -53,8 +47,6 @@ AutoFDO AutoML Autocompletion AvroConfluent -analysisOfVariance -ANOVA BIGINT BIGSERIAL BORO @@ -186,7 +178,6 @@ ComplexKeyCache ComplexKeyDirect ComplexKeyHashed Composable -composable Config ConnectionDetails Const @@ -255,6 +246,7 @@ DockerHub DoubleDelta Doxygen Durre +doesnt ECMA Ecto EdgeAngle @@ -396,8 +388,6 @@ InterserverThreads IsPentagon IsResClassIII IsValid -isNotDistinctFrom -isNullable JBOD JOINed JOINs @@ -466,8 +456,6 @@ KittenHouse Klickhouse Kolmogorov Konstantin -kostik -kostikConsistentHash Korzeniewski Kubernetes LDAP @@ -477,9 +465,8 @@ LLDB LLVM's LOCALTIME LOCALTIMESTAMP -LOONGARCH LONGLONG -LoongArch +LOONGARCH Levenshtein Liao LibFuzzer @@ -497,6 +484,7 @@ LocalThreadActive LogQL Logstash LookML +LoongArch LowCardinality LpDistance LpNorm @@ -571,17 +559,6 @@ MindsDB Mongodb Monotonicity MsgPack -multiSearchAllPositionsCaseInsensitive -multiSearchAllPositionsCaseInsensitiveUTF -multiSearchAnyCaseInsensitive -multiSearchAnyCaseInsensitiveUTF -multiSearchAnyUTF -multiSearchFirstIndexCaseInsensitive -multiSearchFirstIndexCaseInsensitiveUTF -multiSearchFirstIndexUTF -multiSearchFirstPositionCaseInsensitive -multiSearchFirstPositionCaseInsensitiveUTF -multiSearchFirstPositionUTF MultiPolygon Multiline Multiqueries @@ -683,8 +660,8 @@ OSUserTimeNormalized OTLP OUTFILE ObjectId -Observability Oblakov +Observability Octonica Ok OnTime @@ -885,7 +862,6 @@ Simhash SimpleAggregateFunction SimpleState SipHash -sigmoid Smirnov's Smirnov'test Soundex @@ -931,7 +907,6 @@ TAVG TCPConnection TCPThreads TDigest -ThreadMonotonic TINYINT TLSv TMAX @@ -957,7 +932,6 @@ TablesLoaderForegroundThreads TablesLoaderForegroundThreadsActive TablesToDropQueueSize TargetSpecific -tanh Telegraf TemplateIgnoreSpaces TemporaryFilesForAggregation @@ -967,6 +941,7 @@ TemporaryFilesUnknown Testflows Tgz Theil's +ThreadMonotonic ThreadPoolFSReaderThreads ThreadPoolFSReaderThreadsActive ThreadPoolRemoteFSReaderThreads @@ -1027,7 +1002,6 @@ UncompressedCacheBytes UncompressedCacheCells UnidirectionalEdgeIsValid UniqThetaSketch -unshuffled Updatable Uppercased Uptime @@ -1094,6 +1068,7 @@ activerecord addDate addDays addHours +addInterval addMicroseconds addMilliseconds addMinutes @@ -1101,10 +1076,9 @@ addMonths addNanoseconds addQuarters addSeconds +addTupleOfIntervals addWeeks addYears -addInterval -addTupleOfIntervals addr addressToLine addressToLineWithInlines @@ -1119,6 +1093,7 @@ aiochclient allocator alphaTokens amplab +analysisOfVariance analytics anonymize anonymized @@ -1146,15 +1121,19 @@ arrayCumSum arrayCumSumNonNegative arrayDifference arrayDistinct +arrayDotProduct arrayElement arrayEnumerate arrayEnumerateDense +arrayEnumerateDenseRanked arrayEnumerateUniq +arrayEnumerateUniqRanked arrayExists arrayFill arrayFilter arrayFirst arrayFirstIndex +arrayFirstOrNull arrayFlatten arrayFold arrayIntersect @@ -1162,10 +1141,12 @@ arrayJaccardIndex arrayJoin arrayLast arrayLastIndex +arrayLastOrNull arrayMap arrayMax arrayMin arrayPartialReverseSort +arrayPartialShuffle arrayPartialSort arrayPopBack arrayPopFront @@ -1185,6 +1166,7 @@ arrayRotateRight arrayShiftLeft arrayShiftRight arrayShingles +arrayShuffle arraySlice arraySort arraySplit @@ -1366,6 +1348,7 @@ collapsingmergetree combinator combinators comparising +composable compressability concat concatAssumeInjective @@ -1635,6 +1618,8 @@ gcem generateRandom generateRandomStructure generateSeries +generateSnowflakeID +generateSnowflakeIDThreadMonotonic generateULID generateUUIDv geoDistance @@ -1727,8 +1712,8 @@ hasSubsequenceCaseInsensitive hasSubsequenceCaseInsensitiveUTF hasSubsequenceUTF hasSubstr -hasToken hasThreadFuzzer +hasToken hasTokenCaseInsensitive hasTokenCaseInsensitiveOrNull hasTokenOrNull @@ -1801,8 +1786,10 @@ isIPAddressInRange isIPv isInfinite isNaN +isNotDistinctFrom isNotNull isNull +isNullable isValidJSON isValidUTF isZeroOrNull @@ -1854,6 +1841,8 @@ kolmogorovSmirnovTest kolmogorovsmirnovtest kolya konsole +kostik +kostikConsistentHash kurtPop kurtSamp kurtosis @@ -1865,9 +1854,9 @@ laravel largestTriangleThreeBuckets latencies ldap -leftUTF leftPad leftPadUTF +leftUTF lemmatization lemmatize lemmatized @@ -1914,8 +1903,8 @@ logTrace logagent loghouse london -loongarch lookups +loongarch lowcardinality lowerUTF lowercased @@ -1986,8 +1975,8 @@ mispredictions mmap mmapped modularization -moduloOrZero moduli +moduloOrZero mongodb monotonicity monthName @@ -2004,10 +1993,21 @@ multiMatchAllIndices multiMatchAny multiMatchAnyIndex multiSearchAllPositions +multiSearchAllPositionsCaseInsensitive +multiSearchAllPositionsCaseInsensitiveUTF multiSearchAllPositionsUTF multiSearchAny +multiSearchAnyCaseInsensitive +multiSearchAnyCaseInsensitiveUTF +multiSearchAnyUTF multiSearchFirstIndex +multiSearchFirstIndexCaseInsensitive +multiSearchFirstIndexCaseInsensitiveUTF +multiSearchFirstIndexUTF multiSearchFirstPosition +multiSearchFirstPositionCaseInsensitive +multiSearchFirstPositionCaseInsensitiveUTF +multiSearchFirstPositionUTF multibyte multidirectory multiline @@ -2093,6 +2093,7 @@ ok omclickhouse onstraints ontime +onwards openSSL openSUSE openldap @@ -2204,6 +2205,7 @@ procfs profiler proleptic prometheus +proportionsZTest proto protobuf protobufsingle @@ -2342,8 +2344,8 @@ retentions rethrow retransmit retriable -rewritable reverseUTF +rewritable rightPad rightPadUTF rightUTF @@ -2403,8 +2405,9 @@ sharded sharding shortcircuit shortkeys -showCertificate shoutout +showCertificate +sigmoid simdjson simpleJSON simpleJSONExtractBool @@ -2418,8 +2421,8 @@ simpleLinearRegression simpleaggregatefunction simplelinearregression simpod -singlepart singleValueOrNull +singlepart singlevalueornull sinh sipHash @@ -2464,13 +2467,13 @@ statbox stateful stddev stddevPop -stddevSamp -stddevpop -stddevsamp -stddevpopstable stddevPopStable -stddevsampstable +stddevSamp stddevSampStable +stddevpop +stddevpopstable +stddevsamp +stddevsampstable stderr stdin stdout @@ -2531,6 +2534,7 @@ substrings subtitiles subtractDays subtractHours +subtractInterval subtractMicroseconds subtractMilliseconds subtractMinutes @@ -2538,10 +2542,9 @@ subtractMonths subtractNanoseconds subtractQuarters subtractSeconds +subtractTupleOfIntervals subtractWeeks subtractYears -subtractInterval -subtractTupleOfIntervals subtree subtrees subtype @@ -2550,13 +2553,13 @@ sumCount sumKahan sumMap sumMapFiltered +sumMapFilteredWithOverflow +sumMapWithOverflow sumWithOverflow sumcount sumkahan summap summapwithoverflow -sumMapWithOverflow -sumMapFilteredWithOverflow summingmergetree sumwithoverflow superaggregates @@ -2579,6 +2582,7 @@ tabseparatedrawwithnames tabseparatedrawwithnamesandtypes tabseparatedwithnames tabseparatedwithnamesandtypes +tanh tcp tcpPort tcpnodelay @@ -2713,18 +2717,18 @@ tupleDivide tupleDivideByNumber tupleElement tupleHammingDistance +tupleIntDiv +tupleIntDivByNumber +tupleIntDivOrZero +tupleIntDivOrZeroByNumber tupleMinus +tupleModulo +tupleModuloByNumber tupleMultiply tupleMultiplyByNumber tupleNegate tuplePlus tupleToNameValuePairs -tupleIntDiv -tupleIntDivByNumber -tupleIntDivOrZero -tupleIntDivOrZeroByNumber -tupleModulo -tupleModuloByNumber turbostat txt typename @@ -2763,10 +2767,12 @@ unixODBC unixodbc unoptimized unparsed +unpooled unrealiable unreplicated unresolvable unrounded +unshuffled untracked untrusted untuple @@ -2777,8 +2783,8 @@ uptime uptrace uring url -urlencoded urlCluster +urlencoded urls usearch userspace diff --git a/utils/keeper-bench/CMakeLists.txt b/utils/keeper-bench/CMakeLists.txt index 5514c34f4ef..4fe0d852fd2 100644 --- a/utils/keeper-bench/CMakeLists.txt +++ b/utils/keeper-bench/CMakeLists.txt @@ -4,5 +4,4 @@ if (NOT TARGET ch_contrib::rapidjson) endif () clickhouse_add_executable(keeper-bench Generator.cpp Runner.cpp Stats.cpp main.cpp) -target_link_libraries(keeper-bench PRIVATE dbms) -target_link_libraries(keeper-bench PRIVATE ch_contrib::rapidjson) +target_link_libraries(keeper-bench PRIVATE dbms clickhouse_functions ch_contrib::rapidjson) diff --git a/utils/keeper-bench/Generator.cpp b/utils/keeper-bench/Generator.cpp index 2212f7158ae..cbf1bcdae23 100644 --- a/utils/keeper-bench/Generator.cpp +++ b/utils/keeper-bench/Generator.cpp @@ -40,54 +40,6 @@ std::string generateRandomString(size_t length) } } -void removeRecursive(Coordination::ZooKeeper & zookeeper, const std::string & path) -{ - namespace fs = std::filesystem; - - auto promise = std::make_shared>(); - auto future = promise->get_future(); - - Strings children; - auto list_callback = [promise, &children] (const ListResponse & response) - { - children = response.names; - - promise->set_value(); - }; - zookeeper.list(path, ListRequestType::ALL, list_callback, nullptr); - future.get(); - - while (!children.empty()) - { - Coordination::Requests ops; - for (size_t i = 0; i < MULTI_BATCH_SIZE && !children.empty(); ++i) - { - removeRecursive(zookeeper, fs::path(path) / children.back()); - ops.emplace_back(makeRemoveRequest(fs::path(path) / children.back(), -1)); - children.pop_back(); - } - auto multi_promise = std::make_shared>(); - auto multi_future = multi_promise->get_future(); - - auto multi_callback = [multi_promise] (const MultiResponse &) - { - multi_promise->set_value(); - }; - zookeeper.multi(ops, multi_callback); - multi_future.get(); - } - auto remove_promise = std::make_shared>(); - auto remove_future = remove_promise->get_future(); - - auto remove_callback = [remove_promise] (const RemoveResponse &) - { - remove_promise->set_value(); - }; - - zookeeper.remove(path, -1, remove_callback); - remove_future.get(); -} - NumberGetter NumberGetter::fromConfig(const std::string & key, const Poco::Util::AbstractConfiguration & config, std::optional default_value) { @@ -603,148 +555,16 @@ Generator::Generator(const Poco::Util::AbstractConfiguration & config) acl.id = "anyone"; default_acls.emplace_back(std::move(acl)); - static const std::string generator_key = "generator"; - - std::cerr << "---- Parsing setup ---- " << std::endl; - static const std::string setup_key = generator_key + ".setup"; - Poco::Util::AbstractConfiguration::Keys keys; - config.keys(setup_key, keys); - for (const auto & key : keys) - { - if (key.starts_with("node")) - { - auto node_key = setup_key + "." + key; - auto parsed_root_node = parseNode(node_key, config); - const auto node = root_nodes.emplace_back(parsed_root_node); - - if (config.has(node_key + ".repeat")) - { - if (!node->name.isRandom()) - throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Repeating node creation for key {}, but name is not randomly generated", node_key); - - auto repeat_count = config.getUInt64(node_key + ".repeat"); - node->repeat_count = repeat_count; - for (size_t i = 1; i < repeat_count; ++i) - root_nodes.emplace_back(node->clone()); - } - - std::cerr << "Tree to create:" << std::endl; - - node->dumpTree(); - std::cerr << std::endl; - } - } - std::cerr << "---- Done parsing data setup ----\n" << std::endl; - std::cerr << "---- Collecting request generators ----" << std::endl; - static const std::string requests_key = generator_key + ".requests"; + static const std::string requests_key = "generator.requests"; request_getter = RequestGetter::fromConfig(requests_key, config); std::cerr << request_getter.description() << std::endl; std::cerr << "---- Done collecting request generators ----\n" << std::endl; } -std::shared_ptr Generator::parseNode(const std::string & key, const Poco::Util::AbstractConfiguration & config) -{ - auto node = std::make_shared(); - node->name = StringGetter::fromConfig(key + ".name", config); - - if (config.has(key + ".data")) - node->data = StringGetter::fromConfig(key + ".data", config); - - Poco::Util::AbstractConfiguration::Keys node_keys; - config.keys(key, node_keys); - - for (const auto & node_key : node_keys) - { - if (!node_key.starts_with("node")) - continue; - - const auto node_key_string = key + "." + node_key; - auto child_node = parseNode(node_key_string, config); - node->children.push_back(child_node); - - if (config.has(node_key_string + ".repeat")) - { - if (!child_node->name.isRandom()) - throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Repeating node creation for key {}, but name is not randomly generated", node_key_string); - - auto repeat_count = config.getUInt64(node_key_string + ".repeat"); - child_node->repeat_count = repeat_count; - for (size_t i = 1; i < repeat_count; ++i) - node->children.push_back(child_node); - } - } - - return node; -} - -void Generator::Node::dumpTree(int level) const -{ - std::string data_string - = data.has_value() ? fmt::format("{}", data->description()) : "no data"; - - std::string repeat_count_string = repeat_count != 0 ? fmt::format(", repeated {} times", repeat_count) : ""; - - std::cerr << fmt::format("{}name: {}, data: {}{}", std::string(level, '\t'), name.description(), data_string, repeat_count_string) << std::endl; - - for (auto it = children.begin(); it != children.end();) - { - const auto & child = *it; - child->dumpTree(level + 1); - std::advance(it, child->repeat_count != 0 ? child->repeat_count : 1); - } -} - -std::shared_ptr Generator::Node::clone() const -{ - auto new_node = std::make_shared(); - new_node->name = name; - new_node->data = data; - new_node->repeat_count = repeat_count; - - // don't do deep copy of children because we will do clone only for root nodes - new_node->children = children; - - return new_node; -} - -void Generator::Node::createNode(Coordination::ZooKeeper & zookeeper, const std::string & parent_path, const Coordination::ACLs & acls) const -{ - auto path = std::filesystem::path(parent_path) / name.getString(); - auto promise = std::make_shared>(); - auto future = promise->get_future(); - auto create_callback = [promise] (const CreateResponse & response) - { - if (response.error != Coordination::Error::ZOK) - promise->set_exception(std::make_exception_ptr(zkutil::KeeperException(response.error))); - else - promise->set_value(); - }; - zookeeper.create(path, data ? data->getString() : "", false, false, acls, create_callback); - future.get(); - - for (const auto & child : children) - child->createNode(zookeeper, path, acls); -} - void Generator::startup(Coordination::ZooKeeper & zookeeper) { - std::cerr << "---- Creating test data ----" << std::endl; - for (const auto & node : root_nodes) - { - auto node_name = node->name.getString(); - node->name.setString(node_name); - - std::string root_path = std::filesystem::path("/") / node_name; - std::cerr << "Cleaning up " << root_path << std::endl; - removeRecursive(zookeeper, root_path); - - node->createNode(zookeeper, "/", default_acls); - } - std::cerr << "---- Created test data ----\n" << std::endl; - std::cerr << "---- Initializing generators ----" << std::endl; - request_getter.startup(zookeeper); } @@ -752,15 +572,3 @@ Coordination::ZooKeeperRequestPtr Generator::generate() { return request_getter.getRequestGenerator()->generate(default_acls); } - -void Generator::cleanup(Coordination::ZooKeeper & zookeeper) -{ - std::cerr << "---- Cleaning up test data ----" << std::endl; - for (const auto & node : root_nodes) - { - auto node_name = node->name.getString(); - std::string root_path = std::filesystem::path("/") / node_name; - std::cerr << "Cleaning up " << root_path << std::endl; - removeRecursive(zookeeper, root_path); - } -} diff --git a/utils/keeper-bench/Generator.h b/utils/keeper-bench/Generator.h index 5b4c05b2d8b..35dce1a95d9 100644 --- a/utils/keeper-bench/Generator.h +++ b/utils/keeper-bench/Generator.h @@ -173,27 +173,9 @@ public: void startup(Coordination::ZooKeeper & zookeeper); Coordination::ZooKeeperRequestPtr generate(); - void cleanup(Coordination::ZooKeeper & zookeeper); private: - struct Node - { - StringGetter name; - std::optional data; - std::vector> children; - size_t repeat_count = 0; - - std::shared_ptr clone() const; - - void createNode(Coordination::ZooKeeper & zookeeper, const std::string & parent_path, const Coordination::ACLs & acls) const; - void dumpTree(int level = 0) const; - }; - - static std::shared_ptr parseNode(const std::string & key, const Poco::Util::AbstractConfiguration & config); std::uniform_int_distribution request_picker; - std::vector> root_nodes; RequestGetter request_getter; Coordination::ACLs default_acls; }; - -std::optional getGenerator(const std::string & name); diff --git a/utils/keeper-bench/Runner.cpp b/utils/keeper-bench/Runner.cpp index a4b579f1f7b..ed7e09685f0 100644 --- a/utils/keeper-bench/Runner.cpp +++ b/utils/keeper-bench/Runner.cpp @@ -1,14 +1,31 @@ #include "Runner.h" +#include #include -#include "Common/ZooKeeper/ZooKeeperCommon.h" -#include "Common/ZooKeeper/ZooKeeperConstants.h" -#include -#include -#include "IO/ReadBufferFromString.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include namespace CurrentMetrics @@ -22,23 +39,43 @@ namespace DB::ErrorCodes { extern const int CANNOT_BLOCK_SIGNAL; extern const int BAD_ARGUMENTS; + extern const int LOGICAL_ERROR; } Runner::Runner( std::optional concurrency_, const std::string & config_path, + const std::string & input_request_log_, + const std::string & setup_nodes_snapshot_path_, const Strings & hosts_strings_, std::optional max_time_, std::optional delay_, std::optional continue_on_error_, std::optional max_iterations_) - : info(std::make_shared()) + : input_request_log(input_request_log_) + , setup_nodes_snapshot_path(setup_nodes_snapshot_path_) + , info(std::make_shared()) { DB::ConfigProcessor config_processor(config_path, true, false); - auto config = config_processor.loadConfig().configuration; + DB::ConfigurationPtr config = nullptr; + + if (!config_path.empty()) + { + config = config_processor.loadConfig().configuration; + + if (config->has("generator")) + generator.emplace(*config); + } + else + { + if (input_request_log.empty()) + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Both --config and --input_request_log cannot be empty"); + + if (!std::filesystem::exists(input_request_log)) + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "File on path {} does not exist", input_request_log); + } - generator.emplace(*config); if (!hosts_strings_.empty()) { @@ -57,6 +94,8 @@ Runner::Runner( static constexpr uint64_t DEFAULT_CONCURRENCY = 1; if (concurrency_) concurrency = *concurrency_; + else if (!config) + concurrency = DEFAULT_CONCURRENCY; else concurrency = config->getUInt64("concurrency", DEFAULT_CONCURRENCY); std::cerr << "Concurrency: " << concurrency << std::endl; @@ -64,6 +103,8 @@ Runner::Runner( static constexpr uint64_t DEFAULT_ITERATIONS = 0; if (max_iterations_) max_iterations = *max_iterations_; + else if (!config) + max_iterations = DEFAULT_ITERATIONS; else max_iterations = config->getUInt64("iterations", DEFAULT_ITERATIONS); std::cerr << "Iterations: " << max_iterations << std::endl; @@ -71,6 +112,8 @@ Runner::Runner( static constexpr double DEFAULT_DELAY = 1.0; if (delay_) delay = *delay_; + else if (!config) + delay = DEFAULT_DELAY; else delay = config->getDouble("report_delay", DEFAULT_DELAY); std::cerr << "Report delay: " << delay << std::endl; @@ -78,44 +121,48 @@ Runner::Runner( static constexpr double DEFAULT_TIME_LIMIT = 0.0; if (max_time_) max_time = *max_time_; + else if (!config) + max_time = DEFAULT_TIME_LIMIT; else max_time = config->getDouble("timelimit", DEFAULT_TIME_LIMIT); std::cerr << "Time limit: " << max_time << std::endl; if (continue_on_error_) continue_on_error = *continue_on_error_; + else if (!config) + continue_on_error_ = false; else continue_on_error = config->getBool("continue_on_error", false); std::cerr << "Continue on error: " << continue_on_error << std::endl; - static const std::string output_key = "output"; - print_to_stdout = config->getBool(output_key + ".stdout", false); - std::cerr << "Printing output to stdout: " << print_to_stdout << std::endl; - - static const std::string output_file_key = output_key + ".file"; - if (config->has(output_file_key)) + if (config) { - if (config->has(output_file_key + ".path")) - { - file_output = config->getString(output_file_key + ".path"); - output_file_with_timestamp = config->getBool(output_file_key + ".with_timestamp"); - } - else - file_output = config->getString(output_file_key); + benchmark_context.initializeFromConfig(*config); - std::cerr << "Result file path: " << file_output->string() << std::endl; + static const std::string output_key = "output"; + print_to_stdout = config->getBool(output_key + ".stdout", false); + std::cerr << "Printing output to stdout: " << print_to_stdout << std::endl; + + static const std::string output_file_key = output_key + ".file"; + if (config->has(output_file_key)) + { + if (config->has(output_file_key + ".path")) + { + file_output = config->getString(output_file_key + ".path"); + output_file_with_timestamp = config->getBool(output_file_key + ".with_timestamp"); + } + else + file_output = config->getString(output_file_key); + + std::cerr << "Result file path: " << file_output->string() << std::endl; + } } std::cerr << "---- Run options ----\n" << std::endl; - - pool.emplace(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, CurrentMetrics::LocalThreadScheduled, concurrency); - queue.emplace(concurrency); } void Runner::parseHostsFromConfig(const Poco::Util::AbstractConfiguration & config) { - ConnectionInfo default_connection_info; - const auto fill_connection_details = [&](const std::string & key, auto & connection_info) { if (config.has(key + ".secure")) @@ -328,9 +375,770 @@ bool Runner::tryPushRequestInteractively(Coordination::ZooKeeperRequestPtr && re void Runner::runBenchmark() { + if (generator) + runBenchmarkWithGenerator(); + else + runBenchmarkFromLog(); +} + + +struct ZooKeeperRequestBlock +{ + explicit ZooKeeperRequestBlock(DB::Block block_) + : block(std::move(block_)) + , hostname_idx(block.getPositionByName("hostname")) + , request_event_time_idx(block.getPositionByName("request_event_time")) + , thread_id_idx(block.getPositionByName("thread_id")) + , session_id_idx(block.getPositionByName("session_id")) + , xid_idx(block.getPositionByName("xid")) + , has_watch_idx(block.getPositionByName("has_watch")) + , op_num_idx(block.getPositionByName("op_num")) + , path_idx(block.getPositionByName("path")) + , data_idx(block.getPositionByName("data")) + , is_ephemeral_idx(block.getPositionByName("is_ephemeral")) + , is_sequential_idx(block.getPositionByName("is_sequential")) + , response_event_time_idx(block.getPositionByName("response_event_time")) + , error_idx(block.getPositionByName("error")) + , requests_size_idx(block.getPositionByName("requests_size")) + , version_idx(block.getPositionByName("version")) + {} + + size_t rows() const + { + return block.rows(); + } + + UInt64 getExecutorId(size_t row) const + { + return getSessionId(row); + } + + std::string getHostname(size_t row) const + { + return getField(hostname_idx, row).safeGet(); + } + + UInt64 getThreadId(size_t row) const + { + return getField(thread_id_idx, row).safeGet(); + } + + DB::DateTime64 getRequestEventTime(size_t row) const + { + return getField(request_event_time_idx, row).safeGet(); + } + + DB::DateTime64 getResponseEventTime(size_t row) const + { + return getField(response_event_time_idx, row).safeGet(); + } + + Int64 getSessionId(size_t row) const + { + return getField(session_id_idx, row).safeGet(); + } + + Int64 getXid(size_t row) const + { + return getField(xid_idx, row).safeGet(); + } + + bool hasWatch(size_t row) const + { + return getField(has_watch_idx, row).safeGet(); + } + + Coordination::OpNum getOpNum(size_t row) const + { + return static_cast(getField(op_num_idx, row).safeGet()); + } + + bool isEphemeral(size_t row) const + { + return getField(is_ephemeral_idx, row).safeGet(); + } + + bool isSequential(size_t row) const + { + return getField(is_sequential_idx, row).safeGet(); + } + + std::string getPath(size_t row) const + { + return getField(path_idx, row).safeGet(); + } + + std::string getData(size_t row) const + { + return getField(data_idx, row).safeGet(); + } + + UInt64 getRequestsSize(size_t row) const + { + return getField(requests_size_idx, row).safeGet(); + } + + std::optional getVersion(size_t row) const + { + auto field = getField(version_idx, row); + if (field.isNull()) + return std::nullopt; + return static_cast(field.safeGet()); + } + + std::optional getError(size_t row) const + { + auto field = getField(error_idx, row); + if (field.isNull()) + return std::nullopt; + + return static_cast(field.safeGet()); + } +private: + DB::Field getField(size_t position, size_t row) const + { + DB::Field field; + block.getByPosition(position).column->get(row, field); + return field; + } + + DB::Block block; + size_t hostname_idx = 0; + size_t request_event_time_idx = 0; + size_t thread_id_idx = 0; + size_t session_id_idx = 0; + size_t xid_idx = 0; + size_t has_watch_idx = 0; + size_t op_num_idx = 0; + size_t path_idx = 0; + size_t data_idx = 0; + size_t is_ephemeral_idx = 0; + size_t is_sequential_idx = 0; + size_t response_event_time_idx = 0; + size_t error_idx = 0; + size_t requests_size_idx = 0; + size_t version_idx = 0; +}; + +struct RequestFromLog +{ + Coordination::ZooKeeperRequestPtr request; + std::optional expected_result; + std::vector> subrequest_expected_results; + int64_t session_id = 0; + size_t executor_id = 0; + bool has_watch = false; + DB::DateTime64 request_event_time; + DB::DateTime64 response_event_time; + std::shared_ptr connection; +}; + +struct ZooKeeperRequestFromLogReader +{ + ZooKeeperRequestFromLogReader(const std::string & input_request_log, DB::ContextPtr context) + { + std::optional format_settings; + + file_read_buf = std::make_unique(input_request_log); + auto compression_method = DB::chooseCompressionMethod(input_request_log, ""); + file_read_buf = DB::wrapReadBufferWithCompressionMethod(std::move(file_read_buf), compression_method); + + DB::SingleReadBufferIterator read_buffer_iterator(std::move(file_read_buf)); + auto [columns_description, format] = DB::detectFormatAndReadSchema(format_settings, read_buffer_iterator, context); + + DB::ColumnsWithTypeAndName columns; + columns.reserve(columns_description.size()); + + for (const auto & column_description : columns_description) + columns.push_back(DB::ColumnWithTypeAndName{column_description.type, column_description.name}); + + header_block = std::move(columns); + + file_read_buf + = DB::wrapReadBufferWithCompressionMethod(std::make_unique(input_request_log), compression_method); + + input_format = DB::FormatFactory::instance().getInput( + format, + *file_read_buf, + header_block, + context, + context->getSettingsRef().max_block_size, + format_settings, + 1, + std::nullopt, + /*is_remote_fs*/ false, + DB::CompressionMethod::None, + false); + + Coordination::ACL acl; + acl.permissions = Coordination::ACL::All; + acl.scheme = "world"; + acl.id = "anyone"; + default_acls.emplace_back(std::move(acl)); + } + + std::optional getNextRequest(bool for_multi = false) + { + RequestFromLog request_from_log; + + if (!current_block) + { + auto chunk = input_format->generate(); + + if (chunk.empty()) + return std::nullopt; + + current_block.emplace(header_block.cloneWithColumns(chunk.detachColumns())); + idx_in_block = 0; + } + + request_from_log.expected_result = current_block->getError(idx_in_block); + request_from_log.session_id = current_block->getSessionId(idx_in_block); + request_from_log.has_watch = current_block->hasWatch(idx_in_block); + request_from_log.executor_id = current_block->getExecutorId(idx_in_block); + request_from_log.request_event_time = current_block->getRequestEventTime(idx_in_block); + request_from_log.response_event_time = current_block->getResponseEventTime(idx_in_block); + + const auto move_row_iterator = [&] + { + if (idx_in_block == current_block->rows() - 1) + current_block.reset(); + else + ++idx_in_block; + }; + + auto op_num = current_block->getOpNum(idx_in_block); + switch (op_num) + { + case Coordination::OpNum::Create: + { + auto create_request = std::make_shared(); + create_request->path = current_block->getPath(idx_in_block); + create_request->data = current_block->getData(idx_in_block); + create_request->is_ephemeral = current_block->isEphemeral(idx_in_block); + create_request->is_sequential = current_block->isSequential(idx_in_block); + request_from_log.request = create_request; + break; + } + case Coordination::OpNum::Set: + { + auto set_request = std::make_shared(); + set_request->path = current_block->getPath(idx_in_block); + set_request->data = current_block->getData(idx_in_block); + if (auto version = current_block->getVersion(idx_in_block)) + { + /// we just need to make sure that the request with version that need to fail, fail when replaying + if (request_from_log.expected_result == Coordination::Error::ZBADVERSION) + set_request->version = std::numeric_limits::max(); + } + request_from_log.request = set_request; + break; + } + case Coordination::OpNum::Remove: + { + auto remove_request = std::make_shared(); + remove_request->path = current_block->getPath(idx_in_block); + if (auto version = current_block->getVersion(idx_in_block)) + { + /// we just need to make sure that the request with version that need to fail, fail when replaying + if (request_from_log.expected_result == Coordination::Error::ZBADVERSION) + remove_request->version = std::numeric_limits::max(); + } + request_from_log.request = remove_request; + break; + } + case Coordination::OpNum::Check: + case Coordination::OpNum::CheckNotExists: + { + auto check_request = std::make_shared(); + check_request->path = current_block->getPath(idx_in_block); + if (auto version = current_block->getVersion(idx_in_block)) + { + /// we just need to make sure that the request with version that need to fail, fail when replaying + if (request_from_log.expected_result == Coordination::Error::ZBADVERSION) + check_request->version = std::numeric_limits::max(); + } + if (op_num == Coordination::OpNum::CheckNotExists) + check_request->not_exists = true; + request_from_log.request = check_request; + break; + } + case Coordination::OpNum::Sync: + { + auto sync_request = std::make_shared(); + sync_request->path = current_block->getPath(idx_in_block); + request_from_log.request = sync_request; + break; + } + case Coordination::OpNum::Get: + { + auto get_request = std::make_shared(); + get_request->path = current_block->getPath(idx_in_block); + request_from_log.request = get_request; + break; + } + case Coordination::OpNum::SimpleList: + case Coordination::OpNum::FilteredList: + { + auto list_request = std::make_shared(); + list_request->path = current_block->getPath(idx_in_block); + request_from_log.request = list_request; + break; + } + case Coordination::OpNum::Exists: + { + auto exists_request = std::make_shared(); + exists_request->path = current_block->getPath(idx_in_block); + request_from_log.request = exists_request; + break; + } + case Coordination::OpNum::Multi: + case Coordination::OpNum::MultiRead: + { + if (for_multi) + throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Nested multi requests are not allowed"); + + auto requests_size = current_block->getRequestsSize(idx_in_block); + + Coordination::Requests requests; + requests.reserve(requests_size); + move_row_iterator(); + + for (size_t i = 0; i < requests_size; ++i) + { + auto subrequest_from_log = getNextRequest(/*for_multi=*/true); + if (!subrequest_from_log) + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Failed to fetch subrequest for {}, subrequest index {}", op_num, i); + + if (!subrequest_from_log->expected_result && request_from_log.expected_result + && request_from_log.expected_result == Coordination::Error::ZOK) + { + subrequest_from_log->expected_result = Coordination::Error::ZOK; + } + + requests.push_back(std::move(subrequest_from_log->request)); + + if (subrequest_from_log->session_id != request_from_log.session_id) + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Session id mismatch for subrequest in {}, subrequest index {}", op_num, i); + + if (subrequest_from_log->executor_id != request_from_log.executor_id) + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Executor id mismatch for subrequest in {}, subrequest index {}", op_num, i); + + request_from_log.subrequest_expected_results.push_back(subrequest_from_log->expected_result); + } + + request_from_log.request = std::make_shared(requests, default_acls); + + return request_from_log; + } + default: + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Unsupported operation {} ({})", op_num, static_cast(op_num)); + } + + move_row_iterator(); + + return request_from_log; + } + +private: + DB::Block header_block; + + std::unique_ptr file_read_buf; + DB::InputFormatPtr input_format; + + std::optional current_block; + size_t idx_in_block = 0; + + Coordination::ACLs default_acls; +}; + + +namespace +{ + +struct RequestFromLogStats +{ + struct Stats + { + std::atomic total = 0; + std::atomic unexpected_results = 0; + }; + + Stats write_requests; + Stats read_requests; +}; + +struct SetupNodeCollector +{ + explicit SetupNodeCollector(const std::string & setup_nodes_snapshot_path) + { + if (setup_nodes_snapshot_path.empty()) + return; + + keeper_context = std::make_shared(true, std::make_shared()); + keeper_context->setDigestEnabled(true); + keeper_context->setSnapshotDisk( + std::make_shared("Keeper-snapshots", setup_nodes_snapshot_path)); + + snapshot_manager.emplace(1, keeper_context); + auto snapshot_result = snapshot_manager->restoreFromLatestSnapshot(); + if (snapshot_result.storage == nullptr) + { + std::cerr << "No initial snapshot found" << std::endl; + initial_storage = std::make_unique( + /* tick_time_ms */ 500, /* superdigest */ "", keeper_context, /* initialize_system_nodes */ false); + initial_storage->initializeSystemNodes(); + } + else + { + std::cerr << "Loaded initial nodes from snapshot" << std::endl; + initial_storage = std::move(snapshot_result.storage); + } + } + + void processRequest(const RequestFromLog & request_from_log) + { + if (!request_from_log.expected_result.has_value()) + return; + + + auto process_request = [&](const Coordination::ZooKeeperRequest & request, const auto expected_result) + { + const auto & path = request.getPath(); + + if (nodes_created_during_replay.contains(path)) + return; + + auto op_num = request.getOpNum(); + + if (op_num == Coordination::OpNum::Create) + { + if (expected_result == Coordination::Error::ZNODEEXISTS) + { + addExpectedNode(path); + } + else if (expected_result == Coordination::Error::ZOK) + { + nodes_created_during_replay.insert(path); + /// we need to make sure ancestors exist + auto position = path.find_last_of('/'); + if (position != 0) + { + auto parent_path = path.substr(0, position); + addExpectedNode(parent_path); + } + } + } + else if (op_num == Coordination::OpNum::Remove) + { + if (expected_result == Coordination::Error::ZOK || expected_result == Coordination::Error::ZBADVERSION) + addExpectedNode(path); + } + else if (op_num == Coordination::OpNum::Set) + { + if (expected_result == Coordination::Error::ZOK || expected_result == Coordination::Error::ZBADVERSION) + addExpectedNode(path); + } + else if (op_num == Coordination::OpNum::Check) + { + if (expected_result == Coordination::Error::ZOK || expected_result == Coordination::Error::ZBADVERSION) + addExpectedNode(path); + } + else if (op_num == Coordination::OpNum::CheckNotExists) + { + if (expected_result == Coordination::Error::ZNODEEXISTS || expected_result == Coordination::Error::ZBADVERSION) + addExpectedNode(path); + } + else if (request.isReadRequest()) + { + if (expected_result == Coordination::Error::ZOK) + addExpectedNode(path); + } + }; + + const auto & request = request_from_log.request; + if (request->getOpNum() == Coordination::OpNum::Multi || request->getOpNum() == Coordination::OpNum::MultiRead) + { + const auto & multi_request = dynamic_cast(*request); + const auto & subrequests = multi_request.requests; + + for (size_t i = 0; i < subrequests.size(); ++i) + { + const auto & zookeeper_request = dynamic_cast(*subrequests[i]); + const auto subrequest_expected_result = request_from_log.subrequest_expected_results[i]; + if (subrequest_expected_result.has_value()) + process_request(zookeeper_request, *subrequest_expected_result); + + } + } + else + process_request(*request, *request_from_log.expected_result); + } + + void addExpectedNode(const std::string & path) + { + std::lock_guard lock(nodes_mutex); + + if (initial_storage->container.contains(path)) + return; + + new_nodes = true; + std::cerr << "Adding expected node " << path << std::endl; + + Coordination::Requests create_ops; + + size_t pos = 1; + while (true) + { + pos = path.find('/', pos); + if (pos == std::string::npos) + break; + + auto request = zkutil::makeCreateRequest(path.substr(0, pos), "", zkutil::CreateMode::Persistent, true); + create_ops.emplace_back(request); + ++pos; + } + + auto request = zkutil::makeCreateRequest(path, "", zkutil::CreateMode::Persistent, true); + create_ops.emplace_back(request); + + auto next_zxid = initial_storage->getNextZXID(); + + static Coordination::ACLs default_acls = [] + { + Coordination::ACL acl; + acl.permissions = Coordination::ACL::All; + acl.scheme = "world"; + acl.id = "anyone"; + return Coordination::ACLs{std::move(acl)}; + }(); + + auto multi_create_request = std::make_shared(create_ops, default_acls); + initial_storage->preprocessRequest(multi_create_request, 1, 0, next_zxid, /* check_acl = */ false); + auto responses = initial_storage->processRequest(multi_create_request, 1, next_zxid, /* check_acl = */ false); + if (responses.size() > 1 || responses[0].response->error != Coordination::Error::ZOK) + throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Invalid response after trying to create a node {}", responses[0].response->error); + } + + void generateSnapshot() + { + std::lock_guard lock(nodes_mutex); + if (!new_nodes) + { + std::cerr << "No new nodes added" << std::endl; + return; + } + + std::cerr << "Generating snapshot with starting data" << std::endl; + DB::SnapshotMetadataPtr snapshot_meta = std::make_shared(initial_storage->getZXID(), 1, std::make_shared()); + DB::KeeperStorageSnapshot snapshot(initial_storage.get(), snapshot_meta); + snapshot_manager->serializeSnapshotToDisk(snapshot); + + new_nodes = false; + } + + std::mutex nodes_mutex; + DB::KeeperContextPtr keeper_context; + Coordination::KeeperStoragePtr initial_storage; + std::unordered_set nodes_created_during_replay; + std::optional snapshot_manager; + bool new_nodes = false; +}; + +void dumpStats(std::string_view type, const RequestFromLogStats::Stats & stats_for_type) +{ + std::cerr << fmt::format( + "{} requests: {} total, {} with unexpected results ({:.4}%)", + type, + stats_for_type.total, + stats_for_type.unexpected_results, + stats_for_type.total != 0 ? static_cast(stats_for_type.unexpected_results) / stats_for_type.total * 100 : 0.0) + << std::endl; +}; + +void requestFromLogExecutor(std::shared_ptr> queue, RequestFromLogStats & request_stats) +{ + RequestFromLog request_from_log; + std::optional> last_request; + while (queue->pop(request_from_log)) + { + auto request_promise = std::make_shared>(); + last_request = request_promise->get_future(); + Coordination::ResponseCallback callback = [&, + request_promise, + request = request_from_log.request, + expected_result = request_from_log.expected_result, + subrequest_expected_results = std::move(request_from_log.subrequest_expected_results)]( + const Coordination::Response & response) mutable + { + auto & stats = request->isReadRequest() ? request_stats.read_requests : request_stats.write_requests; + + stats.total.fetch_add(1, std::memory_order_relaxed); + + if (expected_result) + { + if (*expected_result != response.error) + stats.unexpected_results.fetch_add(1, std::memory_order_relaxed); + +#if 0 + if (*expected_result != response.error) + { + std::cerr << fmt::format( + "Unexpected result for {}\ngot {}, expected {}\n", request->toString(), response.error, *expected_result) + << std::endl; + + if (const auto * multi_response = dynamic_cast(&response)) + { + std::string subresponses; + for (size_t i = 0; i < multi_response->responses.size(); ++i) + { + subresponses += fmt::format("{} = {}\n", i, multi_response->responses[i]->error); + } + + std::cerr << "Subresponses\n" << subresponses << std::endl; + } + } +#endif + } + + request_promise->set_value(); + }; + + Coordination::WatchCallbackPtr watch; + if (request_from_log.has_watch) + watch = std::make_shared([](const Coordination::WatchResponse &) {}); + + request_from_log.connection->executeGenericRequest(request_from_log.request, callback, watch); + } + + if (last_request) + last_request->wait(); +} + +} + +void Runner::runBenchmarkFromLog() +{ + std::cerr << fmt::format("Running benchmark using requests from {}", input_request_log) << std::endl; + + pool.emplace(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, CurrentMetrics::LocalThreadScheduled, concurrency); + + shared_context = DB::Context::createShared(); + global_context = DB::Context::createGlobal(shared_context.get()); + global_context->makeGlobalContext(); + DB::registerFormats(); + + /// Randomly choosing connection index + pcg64 rng(randomSeed()); + std::uniform_int_distribution connection_distribution(0, connection_infos.size() - 1); + + std::unordered_map> zookeeper_connections; + auto get_zookeeper_connection = [&](int64_t session_id) + { + if (auto it = zookeeper_connections.find(session_id); it != zookeeper_connections.end() && !it->second->isExpired()) + return it->second; + + auto connection_idx = connection_distribution(rng); + auto zk_connection = getConnection(connection_infos[connection_idx], connection_idx); + zookeeper_connections.insert_or_assign(session_id, zk_connection); + return zk_connection; + }; + + RequestFromLogStats stats; + + std::optional setup_nodes_collector; + if (!setup_nodes_snapshot_path.empty()) + setup_nodes_collector.emplace(setup_nodes_snapshot_path); + + std::unordered_map>> executor_id_to_queue; + + SCOPE_EXIT_SAFE({ + for (const auto & [executor_id, executor_queue] : executor_id_to_queue) + executor_queue->finish(); + + pool->wait(); + + + if (setup_nodes_collector) + { + setup_nodes_collector->generateSnapshot(); + } + else + { + dumpStats("Write", stats.write_requests); + dumpStats("Read", stats.read_requests); + } + }); + + auto push_request = [&](RequestFromLog request) + { + if (auto it = executor_id_to_queue.find(request.executor_id); it != executor_id_to_queue.end()) + { + auto success = it->second->push(std::move(request)); + if (!success) + throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Failed to push to the executor's queue"); + return; + } + + auto executor_queue = std::make_shared>(std::numeric_limits::max()); + executor_id_to_queue.emplace(request.executor_id, executor_queue); + auto scheduled = pool->trySchedule([&, executor_queue]() mutable + { + requestFromLogExecutor(std::move(executor_queue), stats); + }); + + if (!scheduled) + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Failed to schedule worker, try to increase concurrency parameter"); + + auto success = executor_queue->push(std::move(request)); + if (!success) + throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Failed to push to the executor's queue"); + }; + + if (!setup_nodes_collector) + { + auto setup_connection = getConnection(connection_infos[0], 0); + benchmark_context.startup(*setup_connection); + } + + ZooKeeperRequestFromLogReader request_reader(input_request_log, global_context); + + delay_watch.restart(); + while (auto request_from_log = request_reader.getNextRequest()) + { + if (setup_nodes_collector) + { + setup_nodes_collector->processRequest(*request_from_log); + } + else + { + request_from_log->connection = get_zookeeper_connection(request_from_log->session_id); + push_request(std::move(*request_from_log)); + } + + if (delay > 0 && delay_watch.elapsedSeconds() > delay) + { + if (setup_nodes_collector) + setup_nodes_collector->generateSnapshot(); + else + { + dumpStats("Write", stats.write_requests); + dumpStats("Read", stats.read_requests); + std::cerr << std::endl; + } + delay_watch.restart(); + } + } +} + +void Runner::runBenchmarkWithGenerator() +{ + pool.emplace(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, CurrentMetrics::LocalThreadScheduled, concurrency); + queue.emplace(concurrency); createConnections(); std::cerr << "Preparing to run\n"; + benchmark_context.startup(*connections[0]); generator->startup(*connections[0]); std::cerr << "Prepared\n"; @@ -341,7 +1149,7 @@ void Runner::runBenchmark() for (size_t i = 0; i < concurrency; ++i) { auto thread_connections = connections; - pool->scheduleOrThrowOnError([this, connections_ = std::move(thread_connections)]() mutable { thread(connections_); }); + pool->scheduleOrThrowOnError([this, my_connections = std::move(thread_connections)]() mutable { thread(my_connections); }); } } catch (...) @@ -458,8 +1266,232 @@ std::vector> Runner::refreshConnections Runner::~Runner() { - queue->clearAndFinish(); + if (queue) + queue->clearAndFinish(); shutdown = true; - pool->wait(); - generator->cleanup(*connections[0]); + + if (pool) + pool->wait(); + + try + { + auto connection = getConnection(connection_infos[0], 0); + benchmark_context.cleanup(*connection); + } + catch (...) + { + DB::tryLogCurrentException("While trying to clean nodes"); + } +} + +namespace +{ + +void removeRecursive(Coordination::ZooKeeper & zookeeper, const std::string & path) +{ + namespace fs = std::filesystem; + + auto promise = std::make_shared>(); + auto future = promise->get_future(); + + Strings children; + auto list_callback = [promise, &children] (const Coordination::ListResponse & response) + { + children = response.names; + promise->set_value(); + }; + zookeeper.list(path, Coordination::ListRequestType::ALL, list_callback, nullptr); + future.get(); + + std::span children_span(children); + while (!children_span.empty()) + { + Coordination::Requests ops; + for (size_t i = 0; i < 1000 && !children.empty(); ++i) + { + removeRecursive(zookeeper, fs::path(path) / children.back()); + ops.emplace_back(zkutil::makeRemoveRequest(fs::path(path) / children_span.back(), -1)); + children_span = children_span.subspan(0, children_span.size() - 1); + } + auto multi_promise = std::make_shared>(); + auto multi_future = multi_promise->get_future(); + + auto multi_callback = [multi_promise] (const Coordination::MultiResponse &) + { + multi_promise->set_value(); + }; + zookeeper.multi(ops, multi_callback); + multi_future.get(); + } + auto remove_promise = std::make_shared>(); + auto remove_future = remove_promise->get_future(); + + auto remove_callback = [remove_promise] (const Coordination::RemoveResponse &) + { + remove_promise->set_value(); + }; + + zookeeper.remove(path, -1, remove_callback); + remove_future.get(); +} + +} + +void BenchmarkContext::initializeFromConfig(const Poco::Util::AbstractConfiguration & config) +{ + Coordination::ACL acl; + acl.permissions = Coordination::ACL::All; + acl.scheme = "world"; + acl.id = "anyone"; + default_acls.emplace_back(std::move(acl)); + + std::cerr << "---- Parsing setup ---- " << std::endl; + static const std::string setup_key = "setup"; + Poco::Util::AbstractConfiguration::Keys keys; + config.keys(setup_key, keys); + for (const auto & key : keys) + { + if (key.starts_with("node")) + { + auto node_key = setup_key + "." + key; + auto parsed_root_node = parseNode(node_key, config); + const auto node = root_nodes.emplace_back(parsed_root_node); + + if (config.has(node_key + ".repeat")) + { + if (!node->name.isRandom()) + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Repeating node creation for key {}, but name is not randomly generated", node_key); + + auto repeat_count = config.getUInt64(node_key + ".repeat"); + node->repeat_count = repeat_count; + for (size_t i = 1; i < repeat_count; ++i) + root_nodes.emplace_back(node->clone()); + } + + std::cerr << "Tree to create:" << std::endl; + + node->dumpTree(); + std::cerr << std::endl; + } + } + std::cerr << "---- Done parsing data setup ----\n" << std::endl; +} + +std::shared_ptr BenchmarkContext::parseNode(const std::string & key, const Poco::Util::AbstractConfiguration & config) +{ + auto node = std::make_shared(); + node->name = StringGetter::fromConfig(key + ".name", config); + + if (config.has(key + ".data")) + node->data = StringGetter::fromConfig(key + ".data", config); + + Poco::Util::AbstractConfiguration::Keys node_keys; + config.keys(key, node_keys); + + for (const auto & node_key : node_keys) + { + if (!node_key.starts_with("node")) + continue; + + const auto node_key_string = key + "." + node_key; + auto child_node = parseNode(node_key_string, config); + node->children.push_back(child_node); + + if (config.has(node_key_string + ".repeat")) + { + if (!child_node->name.isRandom()) + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Repeating node creation for key {}, but name is not randomly generated", node_key_string); + + auto repeat_count = config.getUInt64(node_key_string + ".repeat"); + child_node->repeat_count = repeat_count; + for (size_t i = 1; i < repeat_count; ++i) + node->children.push_back(child_node); + } + } + + return node; +} + +void BenchmarkContext::Node::dumpTree(int level) const +{ + std::string data_string + = data.has_value() ? fmt::format("{}", data->description()) : "no data"; + + std::string repeat_count_string = repeat_count != 0 ? fmt::format(", repeated {} times", repeat_count) : ""; + + std::cerr << fmt::format("{}name: {}, data: {}{}", std::string(level, '\t'), name.description(), data_string, repeat_count_string) << std::endl; + + for (auto it = children.begin(); it != children.end();) + { + const auto & child = *it; + child->dumpTree(level + 1); + std::advance(it, child->repeat_count != 0 ? child->repeat_count : 1); + } +} + +std::shared_ptr BenchmarkContext::Node::clone() const +{ + auto new_node = std::make_shared(); + new_node->name = name; + new_node->data = data; + new_node->repeat_count = repeat_count; + + // don't do deep copy of children because we will do clone only for root nodes + new_node->children = children; + + return new_node; +} + +void BenchmarkContext::Node::createNode(Coordination::ZooKeeper & zookeeper, const std::string & parent_path, const Coordination::ACLs & acls) const +{ + auto path = std::filesystem::path(parent_path) / name.getString(); + auto promise = std::make_shared>(); + auto future = promise->get_future(); + auto create_callback = [promise] (const Coordination::CreateResponse & response) + { + if (response.error != Coordination::Error::ZOK) + promise->set_exception(std::make_exception_ptr(zkutil::KeeperException(response.error))); + else + promise->set_value(); + }; + zookeeper.create(path, data ? data->getString() : "", false, false, acls, create_callback); + future.get(); + + for (const auto & child : children) + child->createNode(zookeeper, path, acls); +} + +void BenchmarkContext::startup(Coordination::ZooKeeper & zookeeper) +{ + if (root_nodes.empty()) + return; + + std::cerr << "---- Creating test data ----" << std::endl; + for (const auto & node : root_nodes) + { + auto node_name = node->name.getString(); + node->name.setString(node_name); + + std::string root_path = std::filesystem::path("/") / node_name; + std::cerr << "Cleaning up " << root_path << std::endl; + removeRecursive(zookeeper, root_path); + + node->createNode(zookeeper, "/", default_acls); + } + std::cerr << "---- Created test data ----\n" << std::endl; +} + +void BenchmarkContext::cleanup(Coordination::ZooKeeper & zookeeper) +{ + if (root_nodes.empty()) + return; + + std::cerr << "---- Cleaning up test data ----" << std::endl; + for (const auto & node : root_nodes) + { + auto node_name = node->name.getString(); + std::string root_path = std::filesystem::path("/") / node_name; + std::cerr << "Cleaning up " << root_path << std::endl; + removeRecursive(zookeeper, root_path); + } } diff --git a/utils/keeper-bench/Runner.h b/utils/keeper-bench/Runner.h index 4f4a75e6ecf..c19a4d82898 100644 --- a/utils/keeper-bench/Runner.h +++ b/utils/keeper-bench/Runner.h @@ -1,5 +1,5 @@ #pragma once -#include "Common/ZooKeeper/ZooKeeperConstants.h" +#include "Common/ZooKeeper/ZooKeeperArgs.h" #include #include "Generator.h" #include @@ -12,6 +12,7 @@ #include #include +#include "Interpreters/Context.h" #include "Stats.h" #include @@ -19,12 +20,42 @@ using Ports = std::vector; using Strings = std::vector; +struct BenchmarkContext +{ +public: + void initializeFromConfig(const Poco::Util::AbstractConfiguration & config); + + void startup(Coordination::ZooKeeper & zookeeper); + void cleanup(Coordination::ZooKeeper & zookeeper); + +private: + struct Node + { + StringGetter name; + std::optional data; + std::vector> children; + size_t repeat_count = 0; + + std::shared_ptr clone() const; + + void createNode(Coordination::ZooKeeper & zookeeper, const std::string & parent_path, const Coordination::ACLs & acls) const; + void dumpTree(int level = 0) const; + }; + + static std::shared_ptr parseNode(const std::string & key, const Poco::Util::AbstractConfiguration & config); + + std::vector> root_nodes; + Coordination::ACLs default_acls; +}; + class Runner { public: Runner( std::optional concurrency_, const std::string & config_path, + const std::string & input_request_log_, + const std::string & setup_nodes_snapshot_path_, const Strings & hosts_strings_, std::optional max_time_, std::optional delay_, @@ -44,8 +75,31 @@ public: ~Runner(); private: + struct ConnectionInfo + { + std::string host; + + bool secure = false; + int32_t session_timeout_ms = Coordination::DEFAULT_SESSION_TIMEOUT_MS; + int32_t connection_timeout_ms = Coordination::DEFAULT_CONNECTION_TIMEOUT_MS; + int32_t operation_timeout_ms = Coordination::DEFAULT_OPERATION_TIMEOUT_MS; + bool use_compression = false; + + size_t sessions = 1; + }; + void parseHostsFromConfig(const Poco::Util::AbstractConfiguration & config); + void runBenchmarkWithGenerator(); + void runBenchmarkFromLog(); + + void createConnections(); + std::vector> refreshConnections(); + std::shared_ptr getConnection(const ConnectionInfo & connection_info, size_t connection_info_idx); + + std::string input_request_log; + std::string setup_nodes_snapshot_path; + size_t concurrency = 1; std::optional pool; @@ -54,7 +108,8 @@ private: double max_time = 0; double delay = 1; bool continue_on_error = false; - std::atomic max_iterations = 0; + size_t max_iterations = 0; + std::atomic requests_executed = 0; std::atomic shutdown = false; @@ -71,25 +126,14 @@ private: using Queue = ConcurrentBoundedQueue; std::optional queue; - struct ConnectionInfo - { - std::string host; - - bool secure = false; - int32_t session_timeout_ms = Coordination::DEFAULT_SESSION_TIMEOUT_MS; - int32_t connection_timeout_ms = Coordination::DEFAULT_CONNECTION_TIMEOUT_MS; - int32_t operation_timeout_ms = Coordination::DEFAULT_OPERATION_TIMEOUT_MS; - bool use_compression = false; - - size_t sessions = 1; - }; - std::mutex connection_mutex; + ConnectionInfo default_connection_info; std::vector connection_infos; std::vector> connections; std::unordered_map connections_to_info_map; - void createConnections(); - std::shared_ptr getConnection(const ConnectionInfo & connection_info, size_t connection_info_idx); - std::vector> refreshConnections(); + DB::SharedContextHolder shared_context; + DB::ContextMutablePtr global_context; + + BenchmarkContext benchmark_context; }; diff --git a/utils/keeper-bench/main.cpp b/utils/keeper-bench/main.cpp index 0753d66850f..0b963abf406 100644 --- a/utils/keeper-bench/main.cpp +++ b/utils/keeper-bench/main.cpp @@ -1,8 +1,6 @@ #include #include #include "Runner.h" -#include "Stats.h" -#include "Generator.h" #include "Common/Exception.h" #include #include @@ -27,6 +25,10 @@ int main(int argc, char *argv[]) bool print_stacktrace = true; + //Poco::AutoPtr channel(new Poco::ConsoleChannel(std::cerr)); + //Poco::Logger::root().setChannel(channel); + //Poco::Logger::root().setLevel("trace"); + try { using boost::program_options::value; @@ -34,12 +36,14 @@ int main(int argc, char *argv[]) boost::program_options::options_description desc = createOptionsDescription("Allowed options", getTerminalWidth()); desc.add_options() ("help", "produce help message") - ("config", value()->default_value(""), "yaml/xml file containing configuration") - ("concurrency,c", value(), "number of parallel queries") - ("report-delay,d", value(), "delay between intermediate reports in seconds (set 0 to disable reports)") - ("iterations,i", value(), "amount of queries to be executed") - ("time-limit,t", value(), "stop launch of queries after specified time limit") - ("hosts,h", value()->multitoken()->default_value(Strings{}, ""), "") + ("config", value()->default_value(""), "yaml/xml file containing configuration") + ("input-request-log", value()->default_value(""), "log of requests that will be replayed") + ("setup-nodes-snapshot-path", value()->default_value(""), "directory containing snapshots with starting state") + ("concurrency,c", value(), "number of parallel queries") + ("report-delay,d", value(), "delay between intermediate reports in seconds (set 0 to disable reports)") + ("iterations,i", value(), "amount of queries to be executed") + ("time-limit,t", value(), "stop launch of queries after specified time limit") + ("hosts,h", value()->multitoken()->default_value(Strings{}, ""), "") ("continue_on_errors", "continue testing even if a query fails") ; @@ -56,6 +60,8 @@ int main(int argc, char *argv[]) Runner runner(valueToOptional(options["concurrency"]), options["config"].as(), + options["input-request-log"].as(), + options["setup-nodes-snapshot-path"].as(), options["hosts"].as(), valueToOptional(options["time-limit"]), valueToOptional(options["report-delay"]), @@ -66,9 +72,9 @@ int main(int argc, char *argv[]) { runner.runBenchmark(); } - catch (const DB::Exception & e) + catch (...) { - std::cout << "Got exception while trying to run benchmark: " << e.message() << std::endl; + std::cout << "Got exception while trying to run benchmark: " << DB::getCurrentExceptionMessage(true) << std::endl; } return 0;